C API 教程

在本教程中，我们将安装 XGBoost 库，并配置 C/C++ 应用的 CMakeLists.txt 文件以链接 XGBoost 库到我们的应用中。稍后，我们将看到一些使用 C API 的有用技巧和代码片段示例，以演示如何使用 C API 中提供的各种函数来执行加载、训练模型和在测试数据集上进行预测等基本任务。有关 API 参考，请访问 XGBoost C 包

要求

安装 CMake - 按照 cmake 安装文档获取说明。安装 Conda - 按照 conda 安装文档获取说明

在 Conda 环境中安装 XGBoost

在你的终端中运行以下命令。下面的命令会将 XGBoost 安装在你克隆的仓库的 XGBoost 文件夹中

# clone the XGBoost repository & its submodules
git clone --recursive https://github.com/dmlc/xgboost
cd xgboost
# Activate the Conda environment, into which we'll install XGBoost
conda activate [env_name]
# Build the compiled version of XGBoost inside the build folder
cmake -B build -S . -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX
# install XGBoost in your conda environment (usually under [your home directory]/miniconda3)
cmake --build build --target install

配置应用的 CMakeList.txt 文件以链接 XGBoost

这里，我们假设你的 C++ 应用使用 CMake 进行构建。

在应用的 CMakeList.txt 中使用 find_package() 和 target_link_libraries() 来链接 XGBoost 库

cmake_minimum_required(VERSION 3.18)
project(your_project_name LANGUAGES C CXX VERSION your_project_version)
find_package(xgboost REQUIRED)
add_executable(your_project_name /path/to/project_file.c)
target_link_libraries(your_project_name xgboost::xgboost)

为确保 CMake 能够找到 XGBoost 库，在调用 CMake 时提供 -DCMAKE_PREFIX_PATH=$CONDA_PREFIX 参数。此选项指示 CMake 在 $CONDA_PREFIX 中查找 XGBoost 库，这是你的 Conda 环境所在的路径。

# Activate the Conda environment where we previously installed XGBoost
conda activate [env_name]
# Invoke CMake with CMAKE_PREFIX_PATH
cmake -B build -S . -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
# Build your application
cmake --build build

有用的注意事项

以下是使用 C API 时的一些有用技巧：

错误处理：始终检查 C API 函数的返回值。

在 C 应用中：使用以下宏来保护所有对 XGBoost C API 函数的调用。该宏会打印所有发生的错误/异常

#define safe_xgboost(call) {  \
  int err = (call); \
  if (err != 0) { \
    fprintf(stderr, "%s:%d: error in %s: %s\n", __FILE__, __LINE__, #call, XGBGetLastError());  \
    exit(1); \
  } \
}

在你的应用中，如下所示将所有 C API 函数调用包装在宏中：

DMatrixHandle train;
safe_xgboost(XGDMatrixCreateFromFile("/path/to/training/dataset/", silent, &train));

在 C++ 应用中：修改宏 safe_xgboost，使其在发生错误时抛出异常。

#define safe_xgboost(call) {  \
  int err = (call); \
  if (err != 0) { \
    throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
                        ": error in " + #call + ":" + XGBGetLastError());  \
  } \
}

断言技术：它在 C/C++ 中都有效。如果表达式求值为 0 (false)，则将表达式、源代码文件名和行号发送到标准错误，然后调用 abort() 函数。这可用于测试你在代码中做出的假设。

DMatrixHandle dmat;
assert( XGDMatrixCreateFromFile("training_data.libsvm", 0, &dmat) == 0);

始终记住要适当地释放 BoosterHandle 和 DMatrixHandle 分配的空间。

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <xgboost/c_api.h>

int main(int argc, char** argv) {
  int silent = 0;

  BoosterHandle booster;

  // do something with booster

  //free the memory
  XGBoosterFree(booster);

  DMatrixHandle DMatrixHandle_param;

  // do something with DMatrixHandle_param

  // free the memory
  XGDMatrixFree(DMatrixHandle_param);

  return 0;
}

对于树模型，在训练和评分/预测期间使用一致的数据格式非常重要，否则会导致错误的输出。例如，如果我们的训练数据采用 密集矩阵 格式，则预测数据集也应该采用 密集矩阵 格式；如果训练采用 libsvm 格式，则预测数据集也应该采用 libsvm 格式。
在为助推器句柄对象的参数设置值时，始终使用字符串。参数值可以是任何数据类型（例如 int、char、float、double 等），但它们应始终编码为字符串。

BoosterHandle booster;
XGBoosterSetParam(booster, "parameter_name", "0.1");

使用 C API 函数的示例和代码片段

如果数据集以文件形式存在，可以使用 XGDMatrixCreateFromFile() 函数将其加载到 DMatrix 对象中。

DMatrixHandle data; // handle to DMatrix
// Load the data from file & store it in data variable of DMatrixHandle datatype
safe_xgboost(XGDMatrixCreateFromFile("/path/to/file/filename", silent, &data));

你还可以使用 XGDMatrixCreateFromMat() 函数从二维矩阵创建 DMatrix 对象。

// 1D matrix
const int data1[] = { 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 };

// 2D matrix
const int ROWS = 6, COLS = 3;
const int data2[ROWS][COLS] = { {1, 2, 3}, {2, 4, 6}, {3, -1, 9}, {4, 8, -1}, {2, 5, 1}, {0, 1, 5} };
DMatrixHandle dmatrix1, dmatrix2;
// Pass the matrix, no of rows & columns contained in the matrix variable
// here '0' represents the missing value in the matrix dataset
// dmatrix variable will contain the created DMatrix using it
safe_xgboost(XGDMatrixCreateFromMat(data1, 1, 50, 0, &dmatrix));
// here -1 represents the missing value in the matrix dataset
safe_xgboost(XGDMatrixCreateFromMat(data2, ROWS, COLS, -1, &dmatrix2));

使用 XGBoosterCreate() 函数创建 Booster 对象以在数据集上进行训练和测试。

BoosterHandle booster;
const int eval_dmats_size;
// We assume that training and test data have been loaded into 'train' and 'test'
DMatrixHandle eval_dmats[eval_dmats_size] = {train, test};
safe_xgboost(XGBoosterCreate(eval_dmats, eval_dmats_size, &booster));

对于每个 DMatrix 对象，使用 XGDMatrixSetFloatInfo() 函数设置标签。之后你可以使用 XGDMatrixGetFloatInfo() 函数访问标签。

const int ROWS=5, COLS=3;
const int data[ROWS][COLS] = { {1, 2, 3}, {2, 4, 6}, {3, -1, 9}, {4, 8, -1}, {2, 5, 1}, {0, 1, 5} };
DMatrixHandle dmatrix;

safe_xgboost(XGDMatrixCreateFromMat(data, ROWS, COLS, -1, &dmatrix));

// variable to store labels for the dataset created from above matrix
float labels[ROWS];

for (int i = 0; i < ROWS; i++) {
  labels[i] = i;
}

// Loading the labels
safe_xgboost(XGDMatrixSetFloatInfo(dmatrix, "label", labels, ROWS));

// reading the labels and store the length of the result
bst_ulong result_len;

// labels result
const float *result;

safe_xgboost(XGDMatrixGetFloatInfo(dmatrix, "label", &result_len, &result));

for(unsigned int i = 0; i < result_len; i++) {
  printf("label[%i] = %f\n", i, result[i]);
}

根据需求使用 XGBoosterSetParam() 函数设置 Booster 对象的参数。请在此查看所有可用参数的完整列表。

BoosterHandle booster;
safe_xgboost(XGBoosterSetParam(booster, "booster", "gblinear"));
// default max_depth =6
safe_xgboost(XGBoosterSetParam(booster, "max_depth", "3"));
// default eta  = 0.3
safe_xgboost(XGBoosterSetParam(booster, "eta", "0.1"));

分别使用 XGBoosterUpdateOneIter() 和 XGBoosterEvalOneIter() 函数来训练和评估模型。

int num_of_iterations = 20;
const char* eval_names[eval_dmats_size] = {"train", "test"};
const char* eval_result = NULL;

for (int i = 0; i < num_of_iterations; ++i) {
  // Update the model performance for each iteration
  safe_xgboost(XGBoosterUpdateOneIter(booster, i, train));

  // Give the statistics for the learner for training & testing dataset in terms of error after each iteration
  safe_xgboost(XGBoosterEvalOneIter(booster, i, eval_dmats, eval_names, eval_dmats_size, &eval_result));
  printf("%s\n", eval_result);
}

注意

对于自定义损失函数，请改用 XGBoosterBoostOneIter() 函数，并手动指定梯度和二阶梯度。

使用 XGBoosterPredictFromDMatrix() 函数在测试集上预测结果。

char const config[] =
    "{\"training\": false, \"type\": 0, "
    "\"iteration_begin\": 0, \"iteration_end\": 0, \"strict_shape\": false}";
/* Shape of output prediction */
uint64_t const* out_shape;
/* Dimension of output prediction */
uint64_t out_dim;
/* Pointer to a thread local contiguous array, assigned in prediction function. */
float const* out_result = NULL;
safe_xgboost(
    XGBoosterPredictFromDMatrix(booster, dmatrix, config, &out_shape, &out_dim, &out_result));

for (unsigned int i = 0; i < output_length; i++){
  printf("prediction[%i] = %f \n", i, output_result[i]);
}

使用 XGBoosterGetNumFeature() 函数获取数据集中的特征数量。

bst_ulong num_of_features = 0;

// Assuming booster variable of type BoosterHandle is already declared
// and dataset is loaded and trained on booster
// storing the results in num_of_features variable
safe_xgboost(XGBoosterGetNumFeature(booster, &num_of_features));

// Printing number of features by type conversion of num_of_features variable from bst_ulong to unsigned long
printf("num_feature: %lu\n", (unsigned long)(num_of_features));

使用 XGBoosterSaveModel() 函数保存模型。

BoosterHandle booster;
const char *model_path = "/path/of/model.json";
safe_xgboost(XGBoosterSaveModel(booster, model_path));

使用 XGBoosterLoadModel() 函数加载模型。

BoosterHandle booster;
const char *model_path = "/path/of/model.json";

// create booster handle first
safe_xgboost(XGBoosterCreate(NULL, 0, &booster));

// set the model parameters here

// load model
safe_xgboost(XGBoosterLoadModel(booster, model_path));

// predict the model here

使用 XGDMatrixFree() 和 XGBoosterFree() 函数释放代码中使用的所有内部结构。这一步对于防止内存泄漏很重要。

safe_xgboost(XGDMatrixFree(dmatrix));
safe_xgboost(XGBoosterFree(booster));