xgboost
c-api-demo.c
#include <assert.h>
#include <stddef.h>
#include <stdint.h> /* uint32_t,uint64_t */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <xgboost/c_api.h>
#define safe_xgboost(call) { \
int err = (call); \
if (err != 0) { \
fprintf(stderr, "%s:%d: 错误在 %s: %s\n", __FILE__, __LINE__, #call, XGBGetLastError()); \
exit(1); \
} \
}
/* 制作 Json 编码的数组接口。*/
static void MakeArrayInterface(size_t data, size_t n, char const* typestr, size_t length,
char* out) {
static char const kTemplate[] =
"{\"data\": [%lu, true], \"shape\": [%lu, %lu], \"typestr\": \"%s\", \"version\": 3}";
memset(out, '\0', length);
sprintf(out, kTemplate, data, n, 1ul, typestr);
}
/* 制作 Json 编码的 DMatrix 配置。*/
static void MakeConfig(int n_threads, size_t length, char* out) {
static char const kTemplate[] = "{\"missing\": NaN, \"nthread\": %d}";
memset(out, '\0', length);
sprintf(out, kTemplate, n_threads);
}
int main() {
int silent = 0;
int use_gpu = 0; // 设置为 1 以使用 GPU 进行训练
// 加载数据
DMatrixHandle dtrain, dtest;
safe_xgboost(XGDMatrixCreateFromFile("../../data/agaricus.txt.train?format=libsvm", silent, &dtrain));
safe_xgboost(XGDMatrixCreateFromFile("../../data/agaricus.txt.test?format=libsvm", silent, &dtest));
// 创建 booster
BoosterHandle booster;
DMatrixHandle eval_dmats[2] = {dtrain, dtest};
safe_xgboost(XGBoosterCreate(eval_dmats, 2, &booster));
// 配置训练
// 可用参数在此描述
// https://docs.xgboost.com.cn/en/release_3.0.0/parameter.html
safe_xgboost(XGBoosterSetParam(booster, "device", use_gpu ? "cuda" : "cpu"));
safe_xgboost(XGBoosterSetParam(booster, "objective", "binary:logistic"));
safe_xgboost(XGBoosterSetParam(booster, "min_child_weight", "1"));
safe_xgboost(XGBoosterSetParam(booster, "gamma", "0.1"));
safe_xgboost(XGBoosterSetParam(booster, "max_depth", "3"));
safe_xgboost(XGBoosterSetParam(booster, "verbosity", silent ? "0" : "1"));
// 训练并评估 10 轮迭代
int n_trees = 10;
const char* eval_names[2] = {"训练", "测试"};
const char* eval_result = NULL;
for (int i = 0; i < n_trees; ++i) {
safe_xgboost(XGBoosterUpdateOneIter(booster, i, dtrain));
safe_xgboost(XGBoosterEvalOneIter(booster, i, eval_dmats, eval_names, 2, &eval_result));
printf("%s\n", eval_result);
}
bst_ulong num_feature = 0;
safe_xgboost(XGBoosterGetNumFeature(booster, &num_feature));
printf("特征数量: %lu\n", (unsigned long)(num_feature));
// 预测
bst_ulong out_len = 0;
int n_print = 10;
/* 使用 DMatrix 对象运行预测。*/
char const config[] =
"{\"training\": false, \"type\": 0, "
"\"iteration_begin\": 0, \"iteration_end\": 0, \"strict_shape\": false}";
/* 输出预测的形状 */
uint64_t const* out_shape;
/* 输出预测的维度 */
uint64_t out_dim;
/* 指向一个线程局部连续数组的指针,在预测函数中赋值。*/
float const* out_result = NULL;
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dtest, config, &out_shape, &out_dim, &out_result));
printf("y_预测: ");
for (int i = 0; i < n_print; ++i) {
printf("%1.4f ", out_result[i]);
}
printf("\n");
// 打印真实标签
safe_xgboost(XGDMatrixGetFloatInfo(dtest, "label", &out_len, &out_result));
printf("y_测试: ");
for (int i = 0; i < n_print; ++i) {
printf("%1.4f ", out_result[i]);
}
printf("\n");
{
printf("稠密矩阵示例 (XGDMatrixCreateFromMat): ");
const float values[] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 0};
safe_xgboost(XGDMatrixCreateFromMat(values, 1, 127, 0.0, &dmat));
const float* out_result = NULL;
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
assert(out_dim == 1);
assert(out_shape[0] == 1);
printf("%1.4f \n", out_result[0]);
safe_xgboost(XGDMatrixFree(dmat));
}
{
printf("稀疏矩阵示例 (XGDMatrixCreateFromCSR): ");
const uint64_t indptr[] = {0, 22};
const uint32_t indices[] = {1, 9, 19, 21, 24, 34, 36, 39, 42, 53, 56,
65, 69, 77, 86, 88, 92, 95, 102, 106, 117, 122};
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
char j_indptr[128];
MakeArrayInterface((size_t)indptr, 2ul, "<u8", sizeof(j_indptr), j_indptr);
char j_indices[128];
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(uint32_t), "<u4",
sizeof(j_indices), j_indices);
char j_data[128];
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
char j_config[64];
MakeConfig(0, sizeof(j_config), j_config);
safe_xgboost(XGDMatrixCreateFromCSR(j_indptr, j_indices, j_data, 127, j_config, &dmat));
const float* out_result = NULL;
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
assert(out_dim == 1);
assert(out_shape[0] == 1);
printf("%1.4f \n", out_result[0]);
safe_xgboost(XGDMatrixFree(dmat));
}
{
printf("稀疏矩阵示例 (XGDMatrixCreateFromCSC): ");
const uint64_t indptr[] = {
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,
15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};
const uint32_t indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
char j_indptr[128];
MakeArrayInterface((size_t)indptr, 128ul, "<u8", sizeof(j_indptr), j_indptr);
char j_indices[128];
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(unsigned), "<u4",
sizeof(j_indices), j_indices);
char j_data[128];
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
char j_config[64];
MakeConfig(0, sizeof(j_config), j_config);
safe_xgboost(XGDMatrixCreateFromCSC(j_indptr, j_indices, j_data, 1, j_config, &dmat));
const float* out_result = NULL;
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
assert(out_dim == 1);
assert(out_shape[0] == 1);
printf("%1.4f \n", out_result[0]);
safe_xgboost(XGDMatrixFree(dmat));
}
// 释放所有资源
safe_xgboost(XGBoosterFree(booster));
safe_xgboost(XGDMatrixFree(dtrain));
safe_xgboost(XGDMatrixFree(dtest));
return 0;
}
XGBoost 的 C API,用于与其他语言进行接口交互。
uint64_t 类型的 bst_ulong
定义: c_api.h:29
int XGBoosterGetNumFeature(BoosterHandle handle, bst_ulong *out)
获取特征数量
int XGBoosterFree(BoosterHandle handle)
删除 booster。
int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[], const char *evnames[], bst_ulong len, const char **out_result)
获取 xgboost 的评估统计信息
int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain)
使用 dtrain 更新模型一轮
int XGBoosterSetParam(BoosterHandle handle, const char *name, const char *value)
设置参数
int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out)
创建一个 XGBoost 学习器 (booster)
int XGDMatrixFree(DMatrixHandle handle)
释放数据矩阵中的空间
int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data, bst_ulong ncol, char const *config, DMatrixHandle *out)
从 CSR 矩阵创建 DMatrix。
int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out)
加载数据矩阵
int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data, bst_ulong nrow, char const *config, DMatrixHandle *out)
从 CSC 矩阵创建 DMatrix。
int XGDMatrixCreateFromMat(const float *data, bst_ulong nrow, bst_ulong ncol, float missing, DMatrixHandle *out)
从稠密矩阵创建矩阵内容
int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len, const float **out_dptr)
从矩阵获取浮点信息向量。
void * 类型的 BoosterHandle
指向 Booster 的句柄
定义: c_api.h:52
void * 类型的 DMatrixHandle
指向 DMatrix 的句柄
定义: c_api.h:50
int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat, char const *config, bst_ulong const **out_shape, bst_ulong *out_dim, float const **out_result)
从 DMatrix 进行预测,替代 XGBoosterPredict。