xgboost
linalg.h
前往此文件文档。
1 
6 #ifndef XGBOOST_LINALG_H_
7 #define XGBOOST_LINALG_H_
8 
9 #include <dmlc/endian.h>
10 #include <xgboost/base.h>
11 #include <xgboost/context.h>
13 #include <xgboost/json.h>
14 #include <xgboost/span.h>
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <cstddef> // for size_t
19 #include <cstdint> // for int32_t
20 #include <limits>
21 #include <string>
22 #include <tuple> // for make_tuple
23 #include <type_traits>
24 #include <utility>
25 #include <vector>
26 
27 #if defined(_MSC_VER)
28 #include <intrin.h>
29 #endif // defined(_MSC_VER)
30 
31 // 使其与 xgboost 解耦。
32 #ifndef LINALG_HD
33 #if defined(__CUDA__) || defined(__NVCC__)
34 #define LINALG_HD __host__ __device__
35 #else
36 #define LINALG_HD
37 #endif // defined (__CUDA__) || defined(__NVCC__)
38 #endif // LINALG_HD
39 
40 namespace xgboost::linalg {
41 namespace detail {
42 
44  template <typename T>
45  static constexpr char TypeChar() {
46  return (std::is_floating_point_v<T>
47  ? 'f'
48  : (std::is_integral_v<T> ? (std::is_signed_v<T> ? 'i' : 'u') : '\0'));
49  }
50 };
51 
52 template <size_t dim, typename S, typename Head, size_t D>
53 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {
54  static_assert(dim < D);
55  return n + head * strides[dim];
56 }
57 
58 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>
59 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,
60  Head head, Tail &&...rest) {
61  static_assert(dim < D);
62  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);
63 }
64 
65 template <int32_t D, bool f_array = false>
66 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {
67  if (f_array) {
68  stride[0] = 1;
69  for (int32_t s = 1; s < D; ++s) {
70  stride[s] = shape[s - 1] * stride[s - 1];
71  }
72  } else {
73  stride[D - 1] = 1;
74  for (int32_t s = D - 2; s >= 0; --s) {
75  stride[s] = shape[s + 1] * stride[s + 1];
76  }
77  }
78 }
79 
80 struct AllTag {};
81 
82 struct IntTag {};
83 
84 template <typename I>
85 struct RangeTag {
86  I beg;
87  I end;
88  [[nodiscard]] constexpr size_t Size() const { return end - beg; }
89 };
90 
94 template <typename T>
95 constexpr int32_t CalcSliceDim() {
96  return std::is_same_v<T, IntTag> ? 0 : 1;
97 }
98 
99 template <typename T, typename... S>
100 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {
101  return CalcSliceDim<T>() + CalcSliceDim<S...>();
102 }
103 
104 template <int32_t D>
105 constexpr size_t CalcSize(size_t (&shape)[D]) {
106  size_t size = 1;
107  for (auto d : shape) {
108  size *= d;
109  }
110  return size;
111 }
112 
113 template <typename S>
114 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;
115 
116 template <typename S>
117 using IndexToTag = std::conditional_t<std::is_integral_v<RemoveCRType<S>>, IntTag, S>
118 
119 template <int32_t n, typename Fn>
120 LINALG_HD constexpr auto UnrollLoop(Fn fn) {
121 #if defined __CUDA_ARCH__
122 #pragma unroll n
123 #endif // defined __CUDA_ARCH__
124  for (int32_t i = 0; i < n; ++i) {
125  fn(i);
126  }
127 }
128 
129 template <typename T>
130 int32_t NativePopc(T v) {
131  int c = 0;
132  for (; v != 0; v &= v - 1) c++;
133  return c;
134 }
135 
136 inline LINALG_HD int Popc(uint32_t v) {
137 #if defined(__CUDA_ARCH__)
138  return __popc(v);
139 #elif defined(__GNUC__) || defined(__clang__)
140  return __builtin_popcount(v);
141 #elif defined(_MSC_VER)
142  return __popcnt(v);
143 #else
144  return NativePopc(v);
145 #endif // compiler
146 }
147 
148 inline LINALG_HD int Popc(uint64_t v) {
149 #if defined(__CUDA_ARCH__)
150  return __popcll(v);
151 #elif defined(__GNUC__) || defined(__clang__)
152  return __builtin_popcountll(v);
153 #elif defined(_MSC_VER) && defined(_M_X64)
154  return __popcnt64(v);
155 #else
156  return NativePopc(v);
157 #endif // compiler
158 }
159 
160 template <std::size_t D, typename Head>
161 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
162  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "无效索引类型。");
163  arr[D - 1] = head;
164 }
165 
169 template <std::size_t D, typename Head, typename... Rest>
170 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
171  static_assert(sizeof...(Rest) < D, "索引溢出。");
172  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "无效索引类型。");
173  arr[D - sizeof...(Rest) - 1] = head;
174  IndexToArr(arr, std::forward<Rest>(index)...);
175 }
176 
177 template <class T, std::size_t N, std::size_t... Idx>
178 constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
179  return std::make_tuple(arr[Idx]...);
180 }
181 
185 template <class T, std::size_t N>
186 constexpr auto ArrToTuple(T (&arr)[N]) {
187  return ArrToTuple(arr, std::make_index_sequence<N>{});
188 }
189 
190 // uint 除法优化受 cupy 中 CIndexer 的启发。除法运算在 CPU 和 GPU 上都很慢,特别是 64 位整数。
191 // 因此,这里我们首先尝试在索引较小时避免 64 位,然后尝试在是 2 的幂时避免除法。
192 // 因此,这里我们首先尝试在索引较小时避免 64 位,然后尝试在是 2 的幂时避免除法。
193 template <typename I, std::int32_t D>
195  std::size_t index[D]{0};
196  static_assert(std::is_signed_v<decltype(D)>,
197  "不要在不改变 for 循环的情况下改变类型。");
198  auto const sptr = shape.data();
199  for (int32_t dim = D; --dim > 0;) {
200  auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);
201  if (s & (s - 1)) {
202  auto t = idx / s;
203  index[dim] = idx - t * s;
204  idx = t;
205  } else { // 2 的幂
206  index[dim] = idx & (s - 1);
207  idx >>= Popc(s - 1);
208  }
209  }
210  index[0] = idx;
211  return ArrToTuple(index);
212 }
213 
214 template <size_t dim, typename I, int32_t D>
215 void ReshapeImpl(size_t (&out_shape)[D], I s) {
216  static_assert(dim < D);
217  out_shape[dim] = s;
218 }
219 
220 template <size_t dim, int32_t D, typename... S, typename I,
221  std::enable_if_t<sizeof...(S) != 0> * = nullptr>
222 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
223  static_assert(dim < D);
224  out_shape[dim] = s;
225  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
226 }
227 
231 template <class...>
232 struct Conjunction : std::true_type {};
233 template <class B1>
234 struct Conjunction<B1> : B1 {};
235 template <class B1, class... Bn>
236 struct Conjunction<B1, Bn...>
237  : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};
238 
239 template <typename... Index>
241 
242 template <typename... Index>
243 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
244 } // namespace detail
245 
249 constexpr detail::AllTag All() { return {}; }
253 template <typename I>
254 constexpr detail::RangeTag<I> Range(I beg, I end) {
255  return {beg, end};
256 }
257 
258 enum Order : std::uint8_t {
259  kC, // 行主序
260  kF, // 列主序
261 };
262 
276 template <typename T, int32_t kDim>
277 class TensorView {
278  public
279  using ShapeT = std::size_t[kDim];
280  using StrideT = ShapeT;
281 
282  using element_type = T; // NOLINT
283  using value_type = std::remove_cv_t<T>; // NOLINT
284 
285  private
286  StrideT stride_{1};
287  ShapeT shape_{0};
288  common::Span<T> data_;
289  T *ptr_{nullptr}; // 指向 data_ 的指针,以避免边界检查。
290 
291  size_t size_{0};
292  DeviceOrd device_;
293 
294  // 与 `Tensor` 不同,data_ 可以具有任意大小,因为这只是一个视图。
295  LINALG_HD void CalcSize() {
296  if (data_.empty()) {
297  size_ = 0;
298  } else {
299  size_ = detail::CalcSize(shape_);
300  }
301  }
302 
303  template <size_t old_dim, size_t new_dim, int32_t D, typename I>
304  LINALG_HD size_t MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],
305  detail::RangeTag<I> &&range) const {
306  static_assert(new_dim < D);
307  static_assert(old_dim < kDim);
308  new_stride[new_dim] = stride_[old_dim];
309  new_shape[new_dim] = range.Size();
310  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
311 
312  auto offset = stride_[old_dim] * range.beg;
313  return offset;
314  }
318  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>
319  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
320  detail::RangeTag<I> &&range, S &&...slices) const {
321  static_assert(new_dim < D);
322  static_assert(old_dim < kDim);
323  new_stride[new_dim] = stride_[old_dim];
324  new_shape[new_dim] = range.Size();
325  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);
326 
327  auto offset = stride_[old_dim] * range.beg;
328  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
329  std::forward<S>(slices)...) +
330  offset;
331  }
332 
333  template <size_t old_dim, size_t new_dim, int32_t D>
334  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {
335  static_assert(new_dim < D);
336  static_assert(old_dim < kDim);
337  new_stride[new_dim] = stride_[old_dim];
338  new_shape[new_dim] = shape_[old_dim];
339  return 0;
340  }
344  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>
345  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,
346  S &&...slices) const {
347  static_assert(new_dim < D);
348  static_assert(old_dim < kDim);
349  new_stride[new_dim] = stride_[old_dim];
350  new_shape[new_dim] = shape_[old_dim];
351  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,
352  std::forward<S>(slices)...);
353  }
354 
355  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>
356  LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],
357  DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {
358  static_assert(old_dim < kDim);
359  return stride_[old_dim] * i;
360  }
364  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>
365  LINALG_HD std::enable_if_t<std::is_integral_v<Index>, size_t> MakeSliceDim(
366  size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {
367  static_assert(old_dim < kDim);
368  auto offset = stride_[old_dim] * i;
369  auto res =
370  MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);
371  return res + offset;
372  }
373 
374  public
375  size_t constexpr static kValueSize = sizeof(T);
376  size_t constexpr static kDimension = kDim;
377 
378  public
390  template <typename I, std::int32_t D>
391  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)
392  : TensorView{data, shape, device, Order::kC} {}
393 
394  template <typename I, int32_t D>
395  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device, Order order)
396  : data_{data}, ptr_{data_.data()}, device_{device} {
397  static_assert(D > 0 && D <= kDim, "无效形状。");
398  // 形状
399  detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });
400  for (auto i = D; i < kDim; ++i) {
401  shape_[i] = 1;
402  }
403  // 步长
404  switch (order) {
405  case Order::kC: {
406  detail::CalcStride(shape_, stride_);
407  break;
408  }
409  case Order::kF: {
410  detail::CalcStride<kDim, true>(shape_, stride_);
411  break;
412  }
413  default: {
414  SPAN_CHECK(false);
415  }
416  }
417  // 大小
418  this->CalcSize();
419  }
420 
425  template <typename I, std::int32_t D>
426  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
427  DeviceOrd device)
428  : data_{data}, ptr_{data_.data()}, device_{device} {
429  static_assert(D == kDim, "无效的形状和步长。");
430  detail::UnrollLoop<D>([&](auto i) {
431  shape_[i] = shape[i];
432  stride_[i] = stride[i];
433  });
434  this->CalcSize();
435  }
436 
437  template <
438  typename U,
439  std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>
440  LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT
441  : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.Device()} {
442  detail::UnrollLoop<kDim>([&](auto i) {
443  stride_[i] = that.Stride(i);
444  shape_[i] = that.Shape(i);
445  });
446  }
447 
461  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
462  LINALG_HD T &operator()(Index &&...index) {
463  static_assert(sizeof...(index) <= kDim, "无效索引。");
464  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
465  assert(offset < data_.size() && "超出边界访问。");
466  return ptr_[offset];
467  }
471  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
472  LINALG_HD T const &operator()(Index &&...index) const {
473  static_assert(sizeof...(index) <= kDim, "无效索引。");
474  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
475  assert(offset < data_.size() && "超出边界访问。");
476  return ptr_[offset];
477  }
478 
492  template <typename... S>
493  LINALG_HD auto Slice(S &&...slices) const {
494  static_assert(sizeof...(slices) <= kDim, "无效切片。");
495  int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};
496  size_t new_shape[kNewDim];
497  size_t new_stride[kNewDim];
498  auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);
499  // 由于维度变化,ret 是不同的类型,因此我们无法访问其私有字段。
500  // 字段。
501  TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,
502  device_};
503  return ret;
504  }
505 
506  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
510  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }
511  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }
515  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
516 
520  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
521  [[nodiscard]] bool Empty() const { return Size() == 0; }
525  [[nodiscard]] LINALG_HD bool Contiguous() const {
526  return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
527  }
531  [[nodiscard]] LINALG_HD bool CContiguous() const {
532  StrideT stride;
533  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);
534  // 如果步长可以从形状计算出来,则它是连续的。
535  detail::CalcStride(shape_, stride);
537  }
541  [[nodiscard]] LINALG_HD bool FContiguous() const {
542  StrideT stride;
543  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);
544  // 如果步长可以从形状计算出来,则它是连续的。
545  detail::CalcStride<kDim, true>(shape_, stride);
547  }
551  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }
555  LINALG_HD auto Device() const { return device_; }
556 };
557 
561 template <typename Container, typename... S,
562  std::enable_if_t<!common::detail::IsSpan<Container>::value &&
563  !std::is_pointer_v<Container>> * = nullptr>
564 auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
565  using T = std::conditional_t<std::is_const_v<Container>,
566  std::add_const_t<typename Container::value_type>,
567  typename Container::value_type>;
568  std::size_t in_shape[sizeof...(S)];
569  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
570  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device()};
571 }
572 
573 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
574 LINALG_HD auto MakeTensorView(DeviceOrd device, common::Span<T, ext> data, S &&...shape) {
575  std::size_t in_shape[sizeof...(S)];
576  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
577  return TensorView<T, sizeof...(S)>{data, in_shape, device};
578 }
579 
580 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
581 auto MakeTensorView(Context const *ctx, common::Span<T, ext> data, S &&...shape) {
582  return MakeTensorView(ctx->Device(), data, std::forward<S>(shape)...);
583 }
584 
585 template <typename T, decltype(common::dynamic_extent) ext, typename... S>
586 auto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data, S &&...shape) {
587  std::size_t in_shape[sizeof...(S)];
588  detail::IndexToArr(in_shape, std::forward<S>(shape)...);
589  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device(), order};
590 }
591 
592 template <typename T, typename... S>
593 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
594  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
595  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
596 }
597 
598 template <typename T, typename... S>
599 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
600  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
601  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
602 }
603 
607 template <size_t D>
609  if (idx > std::numeric_limits<uint32_t>::max()) {
610  return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);
611  } else {
612  return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);
613  }
614 }
615 
616 template <size_t D>
617 LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
619 }
620 
621 template <typename... S>
622 LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
623  std::size_t s[sizeof...(S)];
624  detail::IndexToArr(s, shape...);
625  return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
626 }
627 
633 template <typename T>
635 
643 template <typename T>
644 auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
645  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
646 }
647 
648 template <typename T>
650  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(),
651  data->Size(), data->Device());
652 }
653 
654 template <typename T>
655 auto MakeVec(HostDeviceVector<T> const *data) {
656  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
657  data->Size(), data->Device());
658 }
659 
665 template <typename T>
667 
674 template <typename T, std::int32_t D>
676  Json array_interface{Object{}};
677  array_interface["data"] = std::vector<Json>(2);
678  array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};
679  array_interface["data"][1] = Boolean{true};
680  if (t.Device().IsCUDA()) {
681  // Change this once we have different CUDA stream.
682  array_interface["stream"] = Integer{2};
683  }
684  std::vector<Json> shape(t.Shape().size());
685  std::vector<Json> stride(t.Stride().size());
686  for (size_t i = 0; i < t.Shape().size(); ++i) {
687  shape[i] = Integer(t.Shape(i));
688  stride[i] = Integer(t.Stride(i) * sizeof(T));
689  }
690  array_interface["shape"] = Array{shape};
691  array_interface["strides"] = Array{stride};
692  array_interface["version"] = 3;
693 
694  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();
695  static_assert(kT != '\0');
696  if (DMLC_LITTLE_ENDIAN) {
697  array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};
698  } else {
699  array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};
700  }
701  return array_interface;
702 }
703 
707 template <typename T, int32_t D>
709  TensorView<T const, D> const &as_const = t;
710  auto res = ArrayInterface(as_const);
711  res["data"][1] = Boolean{false};
712  return res;
713 }
714 
718 template <typename T, int32_t D>
720  std::string str;
721  Json::Dump(ArrayInterface(t), &str);
722  return str;
723 }
724 
725 template <typename T, int32_t D>
727  std::string str;
728  Json::Dump(ArrayInterface(t), &str);
729  return str;
730 }
731 
732 template <typename T>
733 auto Make1dInterface(T const *vec, std::size_t len) {
734  Context ctx;
735  auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
736  auto str = linalg::ArrayInterfaceStr(t);
737  return str;
738 }
739 
744 template <typename T, int32_t kDim = 5>
745 class Tensor {
746  public
747  using ShapeT = std::size_t[kDim];
748  using StrideT = ShapeT;
749 
750  private
751  HostDeviceVector<T> data_;
752  ShapeT shape_{0};
753  Order order_{Order::kC};
754 
755  template <typename I, std::int32_t D>
756  void Initialize(I const (&shape)[D], DeviceOrd device) {
757  static_assert(D <= kDim, "Invalid shape.");
758  std::copy(shape, shape + D, shape_);
759  for (auto i = D; i < kDim; ++i) {
760  shape_[i] = 1;
761  }
762  if (!device.IsCPU()) {
763  data_.SetDevice(device);
764  data_.ConstDevicePointer(); // Pull to device;
765  }
766  CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
767  }
768 
769  public
770  Tensor() = default;
771 
778  template <typename I, int32_t D>
779  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
780  : Tensor{common::Span<I const, D>{shape}, device, order} {}
781 
782  template <typename I, size_t D>
783  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
784  : order_{order} {
785  // No device unroll as this is a host only function.
786  std::copy(shape.data(), shape.data() + D, shape_);
787  for (auto i = D; i < kDim; ++i) {
788  shape_[i] = 1;
789  }
790  auto size = detail::CalcSize(shape_);
791  if (!device.IsCPU()) {
792  data_.SetDevice(device);
793  }
794  data_.Resize(size);
795  if (!device.IsCPU()) {
796  data_.DevicePointer(); // Pull to device
797  }
798  }
802  template <typename It, typename I, int32_t D>
803  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
804  : order_{order} {
805  auto &h_vec = data_.HostVector();
806  h_vec.insert(h_vec.begin(), begin, end);
807  // shape
808  this->Initialize(shape, device);
809  }
810 
811  template <typename I, int32_t D>
812  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
813  Order order = kC)
814  : order_{order} {
815  auto &h_vec = data_.HostVector();
816  h_vec = data;
817  // shape
818  this->Initialize(shape, device);
819  }
824  template <typename... Index>
825  T &operator()(Index &&...idx) {
826  return this->HostView()(std::forward<Index>(idx)...);
827  }
832  template <typename... Index>
833  T const &operator()(Index &&...idx) const {
834  return this->HostView()(std::forward<Index>(idx)...);
835  }
836 
840  auto View(DeviceOrd device) {
841  if (device.IsCPU()) {
842  auto span = data_.HostSpan();
843  return TensorView<T, kDim>{span, shape_, device, order_};
844  } else {
845  data_.SetDevice(device);
846  auto span = data_.DeviceSpan();
847  return TensorView<T, kDim>{span, shape_, device, order_};
848  }
849  }
850  auto View(DeviceOrd device) const {
851  if (device.IsCPU()) {
852  auto span = data_.ConstHostSpan();
853  return TensorView<T const, kDim>{span, shape_, device, order_};
854  } else {
855  data_.SetDevice(device);
856  auto span = data_.ConstDeviceSpan();
857  return TensorView<T const, kDim>{span, shape_, device, order_};
858  }
859  }
860 
861  auto HostView() { return this->View(DeviceOrd::CPU()); }
862  auto HostView() const { return this->View(DeviceOrd::CPU()); }
863 
864  [[nodiscard]] std::size_t Size() const { return data_.Size(); }
865  [[nodiscard]] bool Empty() const { return Size() == 0; }
866 
867  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
868  auto Shape(size_t i) const { return shape_[i]; }
869 
870  HostDeviceVector<T> *Data() { return &data_; }
871  HostDeviceVector<T> const *Data() const { return &data_; }
872 
879  template <typename Fn>
880  void ModifyInplace(Fn &&fn) {
881  fn(this->Data(), common::Span<size_t, kDim>{this->shape_});
882  CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))
883  << "Inconsistent size after modification.";
884  }
885 
891  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
892  void Reshape(S &&...s) {
893  static_assert(sizeof...(S) <= kDim, "Invalid shape.");
894  detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
895  auto constexpr kEnd = sizeof...(S);
896  static_assert(kEnd <= kDim, "Invalid shape.");
897  std::fill(shape_ + kEnd, shape_ + kDim, 1);
898  auto n = detail::CalcSize(shape_);
899  data_.Resize(n);
900  }
901 
907  template <size_t D>
909  static_assert(D <= kDim, "Invalid shape.");
910  std::copy(shape.data(), shape.data() + D, this->shape_);
911  std::fill(shape_ + D, shape_ + kDim, 1);
912  auto n = detail::CalcSize(shape_);
913  data_.Resize(n);
914  }
915 
916  template <size_t D>
917  void Reshape(size_t (&shape)[D]) {
918  this->Reshape(common::Span<size_t const, D>{shape});
919  }
923  template <typename... S>
924  auto Slice(S &&...slices) const {
925  return this->HostView().Slice(std::forward<S>(slices)...);
926  }
930  template <typename... S>
931  auto Slice(S &&...slices) {
932  return this->HostView().Slice(std::forward<S>(slices)...);
933  }
934 
938  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
939  [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
940 };
941 
942 template <typename T>
944 
945 template <typename T>
947 
951 template <typename T, typename... Index>
952 auto Empty(Context const *ctx, Index &&...index) {
953  Tensor<T, sizeof...(Index)> t;
954  t.SetDevice(ctx->Device());
955  t.Reshape(index...);
956  return t;
957 }
958 
962 template <typename T, typename... Index>
963 auto Constant(Context const *ctx, T v, Index &&...index) {
964  Tensor<T, sizeof...(Index)> t;
965  t.SetDevice(ctx->Device());
966  t.Reshape(index...);
967  t.Data()->Fill(std::move(v));
968  return t;
969 }
970 
974 template <typename T, typename... Index>
975 auto Zeros(Context const *ctx, Index &&...index) {
976  return Constant(ctx, static_cast<T>(0), index...);
977 }
978 
979 // Only first axis is supported for now.
980 template <typename T, int32_t D>
981 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
982  if (r.Device().IsCUDA()) {
983  l->SetDevice(r.Device());
984  }
986  for (size_t i = 1; i < D; ++i) {
987  if (shape[i] == 0) {
988  shape[i] = r.Shape(i);
989  } else {
990  CHECK_EQ(shape[i], r.Shape(i));
991  }
992  }
993  data->Extend(*r.Data());
994  shape[0] = l->Shape(0) + r.Shape(0);
995  });
996 }
997 } // namespace xgboost::linalg
998 
999 #if defined(LINALG_HD)
1000 #undef LINALG_HD
1001 #endif // defined(LINALG_HD)
1002 #endif // XGBOOST_LINALG_H_
为 xgboost 定义配置宏和基本类型。
定义: host_device_vector.h:87
std::size_t Size() const
const T * ConstDevicePointer() const
void Extend(const HostDeviceVector< T > &other)
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:116
std::vector< T > & HostVector()
common::Span< const T > ConstDeviceSpan() const
T * HostPointer()
定义: host_device_vector.h:113
common::Span< T > DeviceSpan()
common::Span< T > HostSpan()
定义: host_device_vector.h:114
void SetDevice(DeviceOrd device) const
DeviceOrd Device() const
void Resize(std::size_t new_size)
const T * ConstHostPointer() const
定义: host_device_vector.h:117
定义: json.h:116
描述 true 和 false。
定义: json.h:350
定义: json.h:295
定义: json.h:219
定义: json.h:89
表示JSON格式的数据结构。
Definition: json.h:392
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
编码 JSON 对象。可选参数 mode 用于选择文本或二进制 (ubjson) 输出。
span类实现,基于ISO++20 span<T>。接口应相同。
Definition: span.h:431
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:550
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
定义: span.h:597
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:555
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
定义: span.h:562
具有静态类型和维度的张量视图。它实现了索引和切片。
定义: linalg.h:277
LINALG_HD std::size_t Size() const
张量中的项目数量。
定义: linalg.h:520
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device)
创建具有数据和形状的张量。
定义: linalg.h:391
std::remove_cv_t< T > value_type
定义: linalg.h:283
T element_type
定义: linalg.h:282
LINALG_HD bool CContiguous() const
是否为C型连续数组。
定义: linalg.h:531
LINALG_HD auto Stride(size_t i) const
定义: linalg.h:515
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device, Order order)
定义: linalg.h:395
LINALG_HD auto Shape() const
定义: linalg.h:506
ShapeT StrideT
定义: linalg.h:280
constexpr static size_t kDimension
定义: linalg.h:376
LINALG_HD auto Stride() const
定义: linalg.h:511
LINALG_HD auto Slice(S &&...slices) const
切片张量。返回的张量具有推断的维度和形状。不支持标量结果。
定义: linalg.h:493
LINALG_HD auto Values() const -> decltype(data_) const &
获取原始数据的引用。
定义: linalg.h:551
LINALG_HD bool Contiguous() const
这是否是一个连续数组,C 和 F 连续都返回 true。
定义: linalg.h:525
bool Empty() const
定义: linalg.h:521
LINALG_HD T const & operator()(Index &&...index) const
索引张量以获取标量值。
定义: linalg.h:472
LINALG_HD TensorView(TensorView< U, kDim > const &that)
定义: linalg.h:440
LINALG_HD T & operator()(Index &&...index)
索引张量以获取标量值。
定义: linalg.h:462
constexpr static size_t kValueSize
定义: linalg.h:375
LINALG_HD bool FContiguous() const
是否为F型连续数组。
定义: linalg.h:541
LINALG_HD auto Shape(size_t i) const
定义: linalg.h:510
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], DeviceOrd device)
使用数据、形状和步幅创建张量。如果步幅可以计算,请勿使用此构造函数...
定义: linalg.h:426
std::size_t[kDim] ShapeT
定义: linalg.h:279
LINALG_HD auto Device() const
获取CUDA设备序号。
定义: linalg.h:555
一个张量存储。要将其用于切片等其他功能,首先需要获取一个视图...
定义: linalg.h:745
auto Slice(S &&...slices)
获取切片的主机视图。
定义: linalg.h:931
bool Empty() const
定义: linalg.h:865
Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order=kC)
定义: linalg.h:803
auto Slice(S &&...slices) const
获取切片的主机视图。
定义: linalg.h:924
HostDeviceVector< T > const * Data() const
定义: linalg.h:871
void Reshape(size_t(&shape)[D])
定义: linalg.h:917
auto View(DeviceOrd device) const
定义: linalg.h:850
auto HostView()
定义: linalg.h:861
auto Shape(size_t i) const
定义: linalg.h:868
HostDeviceVector< T > * Data()
定义: linalg.h:870
T & operator()(Index &&...idx)
索引运算符。非线程安全,不应用于性能关键区域...
定义: linalg.h:825
auto View(DeviceOrd device)
获取此张量的 TensorView。
定义: linalg.h:840
Tensor(common::Span< I const, D > shape, DeviceOrd device, Order order=kC)
定义: linalg.h:783
auto Shape() const
定义: linalg.h:867
void ModifyInplace(Fn &&fn)
用于更改形状和数据的修改器函数。
定义: linalg.h:880
void SetDevice(DeviceOrd device) const
设置此张量的设备序号。
定义: linalg.h:938
Tensor(std::initializer_list< T > data, I const (&shape)[D], DeviceOrd device, Order order=kC)
定义: linalg.h:812
void Reshape(common::Span< size_t const, D > shape)
重塑张量。
定义: linalg.h:908
DeviceOrd Device() const
定义: linalg.h:939
auto HostView() const
定义: linalg.h:862
Tensor(I const (&shape)[D], DeviceOrd device, Order order=kC)
创建具有形状和设备序号的张量。存储将自动初始化。
定义: linalg.h:779
T const & operator()(Index &&...idx) const
索引运算符。非线程安全,不应用于性能关键区域...
定义: linalg.h:833
std::size_t[kDim] ShapeT
定义: linalg.h:747
void Reshape(S &&...s)
重塑张量。
定义: linalg.h:892
ShapeT StrideT
定义: linalg.h:748
std::size_t Size() const
定义: linalg.h:864
设备与主机向量抽象层。
#define LINALG_HD
定义: linalg.h:36
constexpr std::size_t dynamic_extent
定义: span.h:150
Span(std::vector< T > const &) -> Span< T const >
std::conditional_t< std::is_integral_v< RemoveCRType< S > >, IntTag, S > IndexToTag
定义: linalg.h:117
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
定义: linalg.h:194
void ReshapeImpl(size_t(&out_shape)[D], I s)
定义: linalg.h:215
LINALG_HD int Popc(uint32_t v)
定义: linalg.h:136
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
定义: linalg.h:114
constexpr int32_t CalcSliceDim()
计算切片张量的维度。
定义: linalg.h:95
constexpr LINALG_HD auto UnrollLoop(Fn fn)
定义: linalg.h:120
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
定义: linalg.h:53
LINALG_HD void IndexToArr(std::size_t(&arr)[D], Head head)
定义: linalg.h:161
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
定义: linalg.h:66
constexpr auto ArrToTuple(T(&arr)[N], std::index_sequence< Idx... >)
定义: linalg.h:178
int32_t NativePopc(T v)
定义: linalg.h:130
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
定义: linalg.h:243
constexpr size_t CalcSize(size_t(&shape)[D])
定义: linalg.h:105
定义: linalg.h:40
constexpr detail::RangeTag< I > Range(I beg, I end)
指定轴中元素的范围进行切片。
定义: linalg.h:254
auto Make1dInterface(T const *vec, std::size_t len)
定义: linalg.h:733
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape)
用于自动类型推断的构造函数。
定义: linalg.h:564
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
返回数组接口的字符串表示。
定义: linalg.h:719
auto MakeVec(T *ptr, size_t s, DeviceOrd device=DeviceOrd::CPU())
从连续内存创建向量视图。
定义: linalg.h:644
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
将线性索引转换为多维索引。类似于 numpy unravel。
定义: linalg.h:608
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
定义: linalg.h:981
auto Constant(Context const *ctx, T v, Index &&...index)
创建值为v的数组。
定义: linalg.h:963
auto Zeros(Context const *ctx, Index &&...index)
类似于np.zeros,返回一个给定形状和类型,填充为零的新数组。
定义: linalg.h:975
auto Empty(Context const *ctx, Index &&...index)
创建未初始化的数组。
定义: linalg.h:952
constexpr detail::AllTag All()
指定切片轴中的所有元素。
定义: linalg.h:249
Json ArrayInterface(TensorView< T const, D > const &t)
Numpy定义的数组接口。
定义: linalg.h:675
Order
定义: linalg.h:258
@ kC
定义: linalg.h:259
@ kF
定义: linalg.h:260
JsonInteger Integer
定义: json.h:621
#define SPAN_CHECK(cond)
定义: span.h:127
XGBoost的运行时上下文。包含线程和设备等信息。
Definition: context.h:133
DeviceOrd Device() const
获取当前设备和序号。
定义: context.h:200
bool IsCPU() const
XGBoost 是否在 CPU 上运行?
定义: context.h:173
设备序号的类型。该类型被打包成32位,以便在查看类型(如lin...)时高效使用
Definition: context.h:34
bool IsCUDA() const
定义: context.h:44
bool IsCPU() const
定义: context.h:45
constexpr static auto CPU()
CPU 的构造函数。
定义: context.h:64
定义: linalg.h:80
static constexpr char TypeChar()
定义: linalg.h:45
定义: linalg.h:82
constexpr size_t Size() const
定义: linalg.h:88
I end
定义: linalg.h:87
I beg
定义: linalg.h:86