#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#define safe_xgboost(err) \
if ((err) != 0) { \
fprintf(stderr, "%s:%d: error in %s: %s\n", __FILE__, __LINE__, #err, \
XGBGetLastError()); \
exit(1); \
}
#define N_BATCHS 32
#define BATCH_LEN 512
typedef struct _DataIter {
float **data;
float **labels;
size_t *lengths;
size_t n;
size_t cur_it;
DMatrix _proxy;
char _array[128];
} DataIter;
#define safe_malloc(ptr) \
if ((ptr) == NULL) { \
fprintf(stderr, "%s:%d: 分配内存失败.\n", __FILE__, \
__LINE__); \
exit(1); \
}
void DataIterator_Init(DataIter *self, size_t batch_size, size_t n_batches) {
self->n = n_batches;
self->lengths = (size_t *)malloc(self->n * sizeof(size_t));
safe_malloc(self->lengths);
for (size_t i = 0; i < self->n; ++i) {
self->lengths[i] = batch_size;
}
self->data = (float **)malloc(self->n * sizeof(float *));
safe_malloc(self->data);
self->labels = (float **)malloc(self->n * sizeof(float *));
safe_malloc(self->labels);
for (size_t i = 0; i < self->n; ++i) {
self->data[i] = (float *)malloc(self->lengths[i] * sizeof(float));
safe_malloc(self->data[i]);
for (size_t j = 0; j < self->lengths[i]; ++j) {
float x = (float)rand() / (float)(RAND_MAX);
self->data[i][j] = x;
}
self->labels[i] = (float *)malloc(self->lengths[i] * sizeof(float));
safe_malloc(self->labels[i]);
for (size_t j = 0; j < self->lengths[i]; ++j) {
float y = (float)rand() / (float)(RAND_MAX);
self->labels[i][j] = y;
}
}
self->cur_it = 0;
}
void DataIterator_Free(DataIter *self) {
for (size_t i = 0; i < self->n; ++i) {
free(self->data[i]);
free(self->labels[i]);
}
free(self->data);
free(self->lengths);
free(self->labels);
};
DataIter *self = (DataIter *)(handle);
if (self->cur_it == self->n) {
self->cur_it = 0;
return 0;
}
char array[] = "{\"data\": [%lu, false], \"shape\":[%lu, 1], \"typestr\": "
"\"<f4\", \"version\": 3}";
memset(self->_array, '\0', sizeof(self->_array));
sprintf(self->_array, array, (size_t)self->data[self->cur_it],
self->lengths[self->cur_it]);
self->labels[self->cur_it],
self->lengths[self->cur_it], 1));
self->cur_it++;
return 1;
}
DataIter *self = (DataIter *)(handle);
self->cur_it = 0;
}
void TrainModel(DMatrix Xy) {
Booster booster;
DMatrix cache[] = {Xy};
const char *validation_names[1] = {"train"};
const char *validation_result = NULL;
size_t n_rounds = 10;
for (size_t i = 0; i < n_rounds; ++i) {
&validation_result));
printf("%s\n", validation_result);
}
}
int main() {
DataIter iter;
DataIterator_Init(&iter, BATCH_LEN, N_BATCHS);
char config[] = "{\"missing\": NaN, \"cache_prefix\": \"cache\"}";
DMatrix Xy;
&iter, iter._proxy, DataIterator_Reset, DataIterator_Next, config, &Xy));
TrainModel(Xy);
DataIterator_Free(&iter);
return 0;
}
XGBoost 的 C API,用于与其他语言进行接口交互。
int XGBoosterFree(BoosterHandle handle)
删除 booster。
int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[], const char *evnames[], bst_ulong len, const char **out_result)
获取 xgboost 的评估统计信息
int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain)
使用 dtrain 在一轮中更新模型
int XGBoosterSetParam(BoosterHandle handle, const char *name, const char *value)
设置参数
int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out)
创建 XGBoost 学习器 (booster)
int XGDMatrixFree(DMatrixHandle handle)
释放数据矩阵中的空间
int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data, bst_ulong size, int type)
void * BoosterHandle
Booster 的句柄
定义于: c_api.h:52
void * DMatrixHandle
DMatrix 的句柄
定义于: c_api.h:50
int XGBoosterSaveModel(BoosterHandle handle, const char *fname)
将模型保存到现有文件。
int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, XGDMatrixCallbackNext *next, char const *config, DMatrixHandle *out)
使用数据迭代器创建外部内存 DMatrix。
int XGProxyDMatrixCreate(DMatrixHandle *out)
创建一个 DMatrix 代理用于设置数据,可以通过 XGDMatrixFree 释放。
int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *data)
在 DMatrix 代理上设置数据。
void * DataIterHandle
外部数据迭代器的句柄
定义于: c_api.h:343