xgboost
base.h
前往此文件文档。
1 
6 #ifndef XGBOOST_BASE_H_
7 #define XGBOOST_BASE_H_
8 
9 #include <dmlc/omp.h> // for omp_uint, omp_ulong
10 // 将 windefs 放在这里,以尽可能多地保护文件。
11 #include <xgboost/windefs.h>
12 
13 #include <cstdint> // for int32_t, uint64_t, int16_t
14 #include <ostream> // for ostream
15 #include <string> // for string
16 #include <utility> // for pair
17 #include <vector> // for vector
18 
22 #ifndef XGBOOST_STRICT_R_MODE
23 #define XGBOOST_STRICT_R_MODE 0
24 #endif // XGBOOST_STRICT_R_MODE
25 
32 #ifndef XGBOOST_LOG_WITH_TIME
33 #define XGBOOST_LOG_WITH_TIME 1
34 #endif // XGBOOST_LOG_WITH_TIME
35 
39 #ifndef XGBOOST_CUSTOMIZE_GLOBAL_PRNG
40 #define XGBOOST_CUSTOMIZE_GLOBAL_PRNG 0
41 #endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
42 
46 #if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
47 #define XGBOOST_ALIGNAS(X) alignas(X)
48 #else
49 #define XGBOOST_ALIGNAS(X)
50 #endif // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
51 
52 #if defined(__GNUC__)
53 #define XGBOOST_EXPECT(cond, ret) __builtin_expect((cond), (ret))
54 #else
55 #define XGBOOST_EXPECT(cond, ret) (cond)
56 #endif // defined(__GNUC__)
57 
61 #if defined (__CUDA__) || defined(__NVCC__)
62 #define XGBOOST_DEVICE __host__ __device__
63 #else
64 #define XGBOOST_DEVICE
65 #endif // defined (__CUDA__) || defined(__NVCC__)
66 
67 #if defined(__CUDA__) || defined(__CUDACC__)
68 #define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
69 #define XGBOOST_DEV_INLINE __device__ __forceinline__
70 #else
71 #define XGBOOST_HOST_DEV_INLINE
72 #define XGBOOST_DEV_INLINE
73 #endif // defined(__CUDA__) || defined(__CUDACC__)
74 
75 
76 // restrict
77 #if defined(_MSC_VER)
78 #define XGBOOST_RESTRICT __restrict
79 #else
80 #define XGBOOST_RESTRICT __restrict__
81 #endif
82 
83 // 这些检查用于 Makefile。
84 #if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
85 /* 软件预取默认逻辑 */
86 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))) || defined(__INTEL_COMPILER)
87 // 为 Intel 编译器和 MSVC+x86 启用 _mm_prefetch
88  #define XGBOOST_MM_PREFETCH_PRESENT
89  #define XGBOOST_BUILTIN_PREFETCH_PRESENT
90 #elif defined(__GNUC__)
91 // 为 GCC 启用 __builtin_prefetch
92 #define XGBOOST_BUILTIN_PREFETCH_PRESENT
93 #endif // 守卫
94 
95 #endif // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()
96 
97 namespace xgboost {
99 using bst_uint = std::uint32_t; // NOLINT
101 using bst_ulong = std::uint64_t; // NOLINT
103 using bst_float = float; // NOLINT
105 using bst_cat_t = std::int32_t; // NOLINT
107 using bst_feature_t = std::uint32_t; // NOLINT
111 using bst_bin_t = std::int32_t; // NOLINT
115 using bst_idx_t = std::uint64_t; // NOLINT
119 using bst_node_t = std::int32_t; // NOLINT
123 using bst_group_t = std::uint32_t; // NOLINT
127 using bst_target_t = std::uint32_t; // NOLINT
131 using bst_layer_t = std::int32_t; // NOLINT
135 using bst_tree_t = std::int32_t; // NOLINT
139 using bst_d_ordinal_t = std::int16_t; // NOLINT
140 
141 namespace detail {
145 template <typename T>
148  T grad_{0};
150  T hess_{0};
151 
152  XGBOOST_DEVICE void SetGrad(T g) { grad_ = g; }
153  XGBOOST_DEVICE void SetHess(T h) { hess_ = h; }
154 
155  public
156  using ValueT = T;
157 
158  inline void Add(const ValueT& grad, const ValueT& hess) {
159  grad_ += grad;
160  hess_ += hess;
161  }
162 
163  inline static void Reduce(GradientPairInternal<T>& a, const GradientPairInternal<T>& b) { // NOLINT(*)
164  a += b;
165  }
166 
167  GradientPairInternal() = default;
168 
170  SetGrad(grad);
171  SetHess(hess);
172  }
173 
174  // 如果是相同值类型,则为复制构造函数,标记为 default 以便可 trivially_copyable
179 
180  // 如果值类型不同,则为复制构造函数 - 使用 getter 和 setter 执行转换
181  // perform conversion
182  template <typename T2>
184  SetGrad(g.GetGrad());
185  SetHess(g.GetHess());
186  }
187 
188  XGBOOST_DEVICE T GetGrad() const { return grad_; }
189  XGBOOST_DEVICE T GetHess() const { return hess_; }
190 
192  const GradientPairInternal<T> &rhs) {
193  grad_ += rhs.grad_;
194  hess_ += rhs.hess_;
195  return *this;
196  }
197 
199  const GradientPairInternal<T> &rhs) const {
201  g.grad_ = grad_ + rhs.grad_;
202  g.hess_ = hess_ + rhs.hess_;
203  return g;
204  }
205 
207  const GradientPairInternal<T> &rhs) {
208  grad_ -= rhs.grad_;
209  hess_ -= rhs.hess_;
210  return *this;
211  }
212 
214  const GradientPairInternal<T> &rhs) const {
216  g.grad_ = grad_ - rhs.grad_;
217  g.hess_ = hess_ - rhs.hess_;
218  return g;
219  }
220 
222  grad_ *= multiplier;
223  hess_ *= multiplier;
224  return *this;
225  }
226 
229  g.grad_ = grad_ * multiplier;
230  g.hess_ = hess_ * multiplier;
231  return g;
232  }
233 
235  grad_ /= divisor;
236  hess_ /= divisor;
237  return *this;
238  }
239 
242  g.grad_ = grad_ / divisor;
243  g.hess_ = hess_ / divisor;
244  return g;
245  }
246 
248  return grad_ == rhs.grad_ && hess_ == rhs.hess_;
249  }
250 
251  XGBOOST_DEVICE explicit GradientPairInternal(int value) {
252  *this = GradientPairInternal<T>(static_cast<float>(value),
253  static_cast<float>(value));
254  }
255 
256  friend std::ostream &operator<<(std::ostream &os,
257  const GradientPairInternal<T> &g) {
258  os << g.GetGrad() << "/" << g.GetHess();
259  return os;
260  }
261 };
262 } // namespace detail
263 
268 
272  using T = int64_t;
273  T grad_ = 0;
274  T hess_ = 0;
275 
276  public
277  using ValueT = T;
278 
279  XGBOOST_DEVICE GradientPairInt64(T grad, T hess) : grad_(grad), hess_(hess) {}
280  GradientPairInt64() = default;
281 
282  // 如果是相同值类型,则为复制构造函数,标记为 default 以便可 trivially_copyable
285 
286  [[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; }
287  [[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; }
288 
290  grad_ += rhs.grad_;
291  hess_ += rhs.hess_;
292  return *this;
293  }
294 
297  g.grad_ = grad_ + rhs.grad_;
298  g.hess_ = hess_ + rhs.hess_;
299  return g;
300  }
301 
303  grad_ -= rhs.grad_;
304  hess_ -= rhs.hess_;
305  return *this;
306  }
307 
310  g.grad_ = grad_ - rhs.grad_;
311  g.hess_ = hess_ - rhs.hess_;
312  return g;
313  }
314 
315  XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const {
316  return grad_ == rhs.grad_ && hess_ == rhs.hess_;
317  }
318  friend std::ostream &operator<<(std::ostream &os, const GradientPairInt64 &g) {
319  os << g.GetQuantisedGrad() << "/" << g.GetQuantisedHess();
320  return os;
321  }
322 };
323 
324 using Args = std::vector<std::pair<std::string, std::string> >;
325 
327 constexpr bst_float kRtEps = 1e-6f;
328 
330 using omp_ulong = dmlc::omp_ulong; // NOLINT
332 using bst_omp_uint = dmlc::omp_uint; // NOLINT
334 using XGBoostVersionT = std::int32_t;
335 } // namespace xgboost
336 
337 #endif // XGBOOST_BASE_H_
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition: base.h:64
用于高精度梯度对的定点表示。具有不同的接口,因此我们不会...
定义: base.h:271
T ValueT
定义: base.h:277
XGBOOST_DEVICE GradientPairInt64 operator+(const GradientPairInt64 &rhs) const
定义: base.h:295
XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const
定义: base.h:315
XGBOOST_DEVICE T GetQuantisedHess() const
定义: base.h:287
GradientPairInt64(GradientPairInt64 const &g)=default
GradientPairInt64 & operator=(GradientPairInt64 const &g)=default
XGBOOST_DEVICE GradientPairInt64 & operator-=(const GradientPairInt64 &rhs)
定义: base.h:302
friend std::ostream & operator<<(std::ostream &os, const GradientPairInt64 &g)
定义: base.h:318
XGBOOST_DEVICE GradientPairInt64 & operator+=(const GradientPairInt64 &rhs)
定义: base.h:289
XGBOOST_DEVICE T GetQuantisedGrad() const
定义: base.h:286
XGBOOST_DEVICE GradientPairInt64(T grad, T hess)
定义: base.h:279
XGBOOST_DEVICE GradientPairInt64 operator-(const GradientPairInt64 &rhs) const
定义: base.h:308
梯度统计对的实现。模板特化可用于重载不同的...
定义: base.h:146
GradientPairInternal(GradientPairInternal &&g)=default
XGBOOST_DEVICE GradientPairInternal< T > operator*(float multiplier) const
定义: base.h:227
XGBOOST_DEVICE GradientPairInternal< T > & operator+=(const GradientPairInternal< T > &rhs)
定义: base.h:191
void Add(const ValueT &grad, const ValueT &hess)
定义: base.h:158
friend std::ostream & operator<<(std::ostream &os, const GradientPairInternal< T > &g)
定义: base.h:256
XGBOOST_DEVICE GradientPairInternal(T grad, T hess)
定义: base.h:169
XGBOOST_DEVICE GradientPairInternal< T > & operator*=(float multiplier)
定义: base.h:221
XGBOOST_DEVICE GradientPairInternal(int value)
定义: base.h:251
XGBOOST_DEVICE GradientPairInternal< T > operator+(const GradientPairInternal< T > &rhs) const
定义: base.h:198
XGBOOST_DEVICE GradientPairInternal< T > & operator/=(float divisor)
定义: base.h:234
XGBOOST_DEVICE GradientPairInternal< T > operator/(float divisor) const
定义: base.h:240
XGBOOST_DEVICE T GetHess() const
定义: base.h:189
XGBOOST_DEVICE GradientPairInternal< T > & operator-=(const GradientPairInternal< T > &rhs)
定义: base.h:206
GradientPairInternal & operator=(GradientPairInternal &&that)=default
XGBOOST_DEVICE GradientPairInternal< T > operator-(const GradientPairInternal< T > &rhs) const
定义: base.h:213
static void Reduce(GradientPairInternal< T > &a, const GradientPairInternal< T > &b)
定义: base.h:163
GradientPairInternal(GradientPairInternal const &g)=default
XGBOOST_DEVICE T GetGrad() const
定义: base.h:188
GradientPairInternal & operator=(GradientPairInternal const &that)=default
XGBOOST_DEVICE GradientPairInternal(const GradientPairInternal< T2 > &g)
定义: base.h:183
XGBOOST_DEVICE bool operator==(const GradientPairInternal< T > &rhs) const
定义: base.h:247
集成目标、gbm和评估的学习器接口。这是用户面临的XGB...
Definition: base.h:97
std::vector< std::pair< std::string, std::string > > Args
定义: base.h:324
std::uint32_t bst_group_t
用于排名组索引的类型。
定义: base.h:123
std::int32_t bst_node_t
树节点索引的类型。
定义: base.h:119
dmlc::omp_ulong omp_ulong
为 OpenMP 循环定义无符号长整型
定义: base.h:330
dmlc::omp_uint bst_omp_uint
为 OpenMP 循环定义无符号整型
定义: base.h:332
std::int32_t bst_bin_t
直方图 bin 索引的类型。我们有时使用 -1 表示无效 bin。
定义: base.h:111
std::int32_t XGBoostVersionT
用于以二进制形式表示版本号的类型。
定义: base.h:334
std::uint64_t bst_idx_t
数据行索引(样本)的类型。
定义: base.h:115
std::int32_t bst_tree_t
用于索引树的类型。
定义: base.h:135
std::uint32_t bst_target_t
用于索引输出目标的类型。
定义: base.h:127
std::int16_t bst_d_ordinal_t
CUDA 设备的序号。
定义: base.h:139
std::uint32_t bst_uint
用于特征索引的无符号整型。
定义: base.h:99
std::int32_t bst_layer_t
用于索引提升层的类型。
定义: base.h:131
std::uint32_t bst_feature_t
数据列(特征)索引的类型。
Definition: base.h:107
std::uint64_t bst_ulong
无符号长整数
定义: base.h:101
float bst_float
浮点类型,用于存储统计信息
Definition: base.h:103
std::int32_t bst_cat_t
类别值类型。
定义: base.h:105
constexpr bst_float kRtEps
最小分割决策的小 epsilon 间隙。
定义: base.h:327