xgboost
inference.c
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <xgboost/c_api.h>
#define safe_xgboost(err) \
if ((err) != 0) { \
fprintf(stderr, "%s:%d: error in %s: %s\n", __FILE__, __LINE__, #err, \
XGBGetLastError()); \
exit(1); \
}
#define safe_malloc(ptr) \
if ((ptr) == NULL) { \
fprintf(stderr, "%s:%d: Failed to allocate memory.\n", __FILE__, \
__LINE__); \
exit(1); \
}
#define N_SAMPLES 128
#define N_FEATURES 16
typedef BoosterHandle Booster;
typedef DMatrixHandle DMatrix;
/* 行主序矩阵 */
struct _Matrix {
float *data;
size_t shape[2];
/* 私有成员 */
char _array_intrerface[256];
};
/* 演示用的自定义数据类型。*/
typedef struct _Matrix *Matrix;
/* 初始化矩阵,如果 `data` 不为 NULL,则从 `data` 复制数据。*/
void Matrix_Create(Matrix *self, float const *data, size_t n_samples,
size_t n_features) {
if (self == NULL) {
fprintf(stderr, "指向 %s 的指针无效\n", __func__);
exit(-1);
}
*self = (Matrix)malloc(sizeof(struct _Matrix));
safe_malloc(*self);
(*self)->data = (float *)malloc(n_samples * n_features * sizeof(float));
safe_malloc((*self)->data);
(*self)->shape[0] = n_samples;
(*self)->shape[1] = n_features;
if (data != NULL) {
memcpy((*self)->data, data,
(*self)->shape[0] * (*self)->shape[1] * sizeof(float));
}
}
/* 生成随机矩阵。*/
void Matrix_Random(Matrix *self, size_t n_samples, size_t n_features) {
Matrix_Create(self, NULL, n_samples, n_features);
for (size_t i = 0; i < n_samples * n_features; ++i) {
float x = (float)rand() / (float)(RAND_MAX);
(*self)->data[i] = x;
}
}
/* numpy 指定的数组接口。*/
char const *Matrix_ArrayInterface(Matrix self) {
char const template[] = "{\"data\": [%lu, true], \"shape\": [%lu, %lu], "
"\"typestr\": \";
memset(self->_array_intrerface, '\0', sizeof(self->_array_intrerface));
sprintf(self->_array_intrerface, template, (size_t)self->data, self->shape[0],
self->shape[1]);
return self->_array_intrerface;
}
size_t Matrix_NSamples(Matrix self) { return self->shape[0]; }
size_t Matrix_NFeatures(Matrix self) { return self->shape[1]; }
float Matrix_At(Matrix self, size_t i, size_t j) {
return self->data[i * self->shape[1] + j];
}
void Matrix_Print(Matrix self) {
for (size_t i = 0; i < Matrix_NSamples(self); i++) {
for (size_t j = 0; j < Matrix_NFeatures(self); ++j) {
printf("%f, ", Matrix_At(self, i, j));
}
}
printf("\n");
}
void Matrix_Free(Matrix self) {
if (self != NULL) {
if (self->data != NULL) {
self->shape[0] = 0;
self->shape[1] = 0;
free(self->data);
self->data = NULL;
}
free(self);
}
}
int main() {
Matrix X;
Matrix y;
Matrix_Random(&X, N_SAMPLES, N_FEATURES);
Matrix_Random(&y, N_SAMPLES, 1);
char const *X_interface = Matrix_ArrayInterface(X);
char config[] = "{\"nthread\": 16, \"missing\": NaN}";
DMatrix Xy;
/* Dense 表示“稠密矩阵”。*/
safe_xgboost(XGDMatrixCreateFromDense(X_interface, config, &Xy));
/* 标签必须在连续数组中。*/
safe_xgboost(XGDMatrixSetDenseInfo(Xy, "label", y->data, y->shape[0], 1));
DMatrix cache[] = {Xy};
Booster booster;
/* 训练一个助推器以供演示。*/
safe_xgboost(XGBoosterCreate(cache, 1, &booster));
size_t n_rounds = 10;
for (size_t i = 0; i < n_rounds; ++i) {
safe_xgboost(XGBoosterUpdateOneIter(booster, i, Xy));
}
/* 以 JSON 格式保存训练好的模型。*/
safe_xgboost(XGBoosterSaveModel(booster, "model.json"));
safe_xgboost(XGBoosterFree(booster));
/* 加载回来进行推理。保存和加载不是必需的,此处仅为演示目的。*/
* 演示目的。*/
safe_xgboost(XGBoosterCreate(NULL, 0, &booster));
safe_xgboost(XGBoosterLoadModel(booster, "model.json"));
{
/* 使用 DMatrix 对象运行预测。*/
char const config[] =
"{\"training\": false, \"type\": 0, "
"\"iteration_begin\": 0, \"iteration_end\": 0, \"strict_shape\": true}";
/* 输出预测的形状 */
uint64_t const *out_shape;
/* 输出预测的维度 */
uint64_t out_dim;
/* 指向线程局部连续数组的指针,在预测函数中分配。*/
float const *out_results;
safe_xgboost(XGBoosterPredictFromDMatrix(booster, Xy, config, &out_shape,
&out_dim, &out_results));
if (out_dim != 2 || out_shape[0] != N_SAMPLES || out_shape[1] != 1) {
fprintf(stderr, "回归模型应将预测输出为向量。");
exit(-1);
}
Matrix predt;
/* 在调用下一个 API 函数之前,始终从 XGBoost 复制输出。*/
Matrix_Create(&predt, out_results, out_shape[0], out_shape[1]);
printf("预测结果\n");
Matrix_Print(predt);
Matrix_Free(predt);
}
{
/* 运行就地预测,它更快、更节省内存,但只支持
* 基本推理类型。*/
char const config[] = "{\"type\": 0, \"iteration_begin\": 0, "
"\"iteration_end\": 0, \"strict_shape\": true, "
"\"cache_id\": 0, \"missing\": NaN}";
/* 输出预测的形状 */
uint64_t const *out_shape;
/* 输出预测的维度 */
uint64_t out_dim;
/* 指向线程局部连续数组的指针,在预测函数中分配。*/
float const *out_results;
char const *X_interface = Matrix_ArrayInterface(X);
safe_xgboost(XGBoosterPredictFromDense(booster, X_interface, config, NULL,
&out_shape, &out_dim, &out_results));
if (out_dim != 2 || out_shape[0] != N_SAMPLES || out_shape[1] != 1) {
fprintf(stderr,
"回归模型应将预测输出为向量,%lu,%lu",
out_dim, out_shape[0]);
exit(-1);
}
Matrix predt;
/* 在调用下一个 API 函数之前,始终从 XGBoost 复制输出。*/
Matrix_Create(&predt, out_results, out_shape[0], out_shape[1]);
printf("就地预测结果\n");
Matrix_Print(predt);
Matrix_Free(predt);
}
XGBoosterFree(booster);
Matrix_Free(X);
Matrix_Free(y);
return 0;
}
XGBoost 的 C API,用于与其他语言接口。
int XGBoosterFree(BoosterHandle handle)
删除增强器。
int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain)
使用dtrain更新模型一个回合
int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out)
创建一个XGBoost学习器(booster)
int XGDMatrixFree(DMatrixHandle handle)
释放数据矩阵中的空间
int XGDMatrixCreateFromDense(char const *data, char const *config, DMatrixHandle *out)
从密集数组创建 DMatrix。
int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data, bst_ulong size, int type)
void * BoosterHandle
Booster 句柄
定义: c_api.h:52
void * DMatrixHandle
DMatrix 句柄
定义: c_api.h:50
int XGBoosterPredictFromDense(BoosterHandle handle, char const *values, char const *config, DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim, const float **out_result)
从CPU稠密矩阵进行就地预测。
int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat, char const *config, bst_ulong const **out_shape, bst_ulong *out_dim, float const **out_result)
从DMatrix进行预测,取代XGBoosterPredict。
int XGBoosterSaveModel(BoosterHandle handle, const char *fname)
将模型保存到现有文件。
int XGBoosterLoadModel(BoosterHandle handle, const char *fname)
从现有文件加载模型。
Tensor< T, 2 > Matrix
定义: linalg.h:943