stable/dev/linalg_8h_source.html

 #ifndef XGBOOST_LINALG_H_

 #define XGBOOST_LINALG_H_


 #include <dmlc/endian.h>

 #include <xgboost/base.h>

 #include <xgboost/context.h>

 #include <xgboost/host_device_vector.h>

 #include <xgboost/json.h>

 #include <xgboost/span.h>


 #include <algorithm>

 #include <cassert>

 #include <cstddef> // for size_t

 #include <cstdint> // for int32_t

 #include <limits>

 #include <string>

 #include <tuple> // for make_tuple

 #include <type_traits>

 #include <utility>

 #include <vector>


 #if defined(_MSC_VER)

 #include <intrin.h>

 #endif // defined(_MSC_VER)


 // 使其与 xgboost 解耦。

 #ifndef LINALG_HD

 #if defined(__CUDA__) || defined(__NVCC__)

 #define LINALG_HD __host__ __device__

 #else

 #define LINALG_HD

 #endif // defined (__CUDA__) || defined(__NVCC__)

 #endif // LINALG_HD


 namespace xgboost::linalg {

 namespace detail {


 struct ArrayInterfaceHandler {

  template <typename T>

  static constexpr char TypeChar() {

  return (std::is_floating_point_v<T>

  ? 'f'

  : (std::is_integral_v<T> ? (std::is_signed_v<T> ? 'i' : 'u') : '\0'));

   }

 };


 template <size_t dim, typename S, typename Head, size_t D>

 constexpr size_t Offset(S (&strides)[D], size_t n, Head head) {

  static_assert(dim < D);

  return n + head * strides[dim];

 }


 template <size_t dim, typename S, size_t D, typename Head, typename... Tail>

 constexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,

  Head head, Tail &&...rest) {

  static_assert(dim < D);

  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);

 }


 template <int32_t D, bool f_array = false>

 constexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {

  if (f_array) {

  stride[0] = 1;

  for (int32_t s = 1; s < D; ++s) {

  stride[s] = shape[s - 1] * stride[s - 1];

     }

  } else {

  stride[D - 1] = 1;

  for (int32_t s = D - 2; s >= 0; --s) {

  stride[s] = shape[s + 1] * stride[s + 1];

     }

   }

 }


 struct AllTag {};


 struct IntTag {};


 template <typename I>

 struct RangeTag {

  I beg;

  I end;

  [[nodiscard]] constexpr size_t Size() const { return end - beg; }

 };


 template <typename T>

 constexpr int32_t CalcSliceDim() {

  return std::is_same_v<T, IntTag> ? 0 : 1;

 }


 template <typename T, typename... S>

 constexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {

  return CalcSliceDim<T>() + CalcSliceDim<S...>();

 }


 template <int32_t D>

 constexpr size_t CalcSize(size_t (&shape)[D]) {

  size_t size = 1;

  for (auto d : shape) {

  size *= d;

   }

  return size;

 }


 template <typename S>

 using RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;


 template <typename S>

 using IndexToTag = std::conditional_t<std::is_integral_v<RemoveCRType<S>>, IntTag, S>


 template <int32_t n, typename Fn>

 LINALG_HD constexpr auto UnrollLoop(Fn fn) {

 #if defined __CUDA_ARCH__

 #pragma unroll n

 #endif // defined __CUDA_ARCH__

  for (int32_t i = 0; i < n; ++i) {

  fn(i);

   }

 }


 template <typename T>

 int32_t NativePopc(T v) {

  int c = 0;

  for (; v != 0; v &= v - 1) c++;

  return c;

 }


 inline LINALG_HD int Popc(uint32_t v) {

 #if defined(__CUDA_ARCH__)

  return __popc(v);

 #elif defined(__GNUC__) || defined(__clang__)

  return __builtin_popcount(v);

 #elif defined(_MSC_VER)

  return __popcnt(v);

 #else

  return NativePopc(v);

 #endif // compiler

 }


 inline LINALG_HD int Popc(uint64_t v) {

 #if defined(__CUDA_ARCH__)

  return __popcll(v);

 #elif defined(__GNUC__) || defined(__clang__)

  return __builtin_popcountll(v);

 #elif defined(_MSC_VER) && defined(_M_X64)

  return __popcnt64(v);

 #else

  return NativePopc(v);

 #endif // compiler

 }


 template <std::size_t D, typename Head>

 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {

  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "无效索引类型。");

  arr[D - 1] = head;

 }


 template <std::size_t D, typename Head, typename... Rest>

 LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {

  static_assert(sizeof...(Rest) < D, "索引溢出。");

  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, "无效索引类型。");

  arr[D - sizeof...(Rest) - 1] = head;

  IndexToArr(arr, std::forward<Rest>(index)...);

 }


 template <class T, std::size_t N, std::size_t... Idx>

 constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {

  return std::make_tuple(arr[Idx]...);

 }


 template <class T, std::size_t N>

 constexpr auto ArrToTuple(T (&arr)[N]) {

  return ArrToTuple(arr, std::make_index_sequence<N>{});

 }


 // uint 除法优化受 cupy 中 CIndexer 的启发。除法运算在 CPU 和 GPU 上都很慢，特别是 64 位整数。

 // 因此，这里我们首先尝试在索引较小时避免 64 位，然后尝试在是 2 的幂时避免除法。

 // 因此，这里我们首先尝试在索引较小时避免 64 位，然后尝试在是 2 的幂时避免除法。

 template <typename I, std::int32_t D>

 LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {

  std::size_t index[D]{0};

  static_assert(std::is_signed_v<decltype(D)>,

  "不要在不改变 for 循环的情况下改变类型。");

  auto const sptr = shape.data();

  for (int32_t dim = D; --dim > 0;) {

  auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);

  if (s & (s - 1)) {

  auto t = idx / s;

  index[dim] = idx - t * s;

  idx = t;

  } else { // 2 的幂

  index[dim] = idx & (s - 1);

  idx >>= Popc(s - 1);

     }

   }

  index[0] = idx;

  return ArrToTuple(index);

 }


 template <size_t dim, typename I, int32_t D>

 void ReshapeImpl(size_t (&out_shape)[D], I s) {

  static_assert(dim < D);

  out_shape[dim] = s;

 }


 template <size_t dim, int32_t D, typename... S, typename I,

  std::enable_if_t<sizeof...(S) != 0> * = nullptr>

 void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {

  static_assert(dim < D);

  out_shape[dim] = s;

  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);

 }


 template <class...>

 struct Conjunction : std::true_type {};

 template <class B1>

 struct Conjunction<B1> : B1 {};

 template <class B1, class... Bn>

 struct Conjunction<B1, Bn...>

  : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};


 template <typename... Index>

 using IsAllIntegral = Conjunction<std::is_integral<std::remove_reference_t<Index>>...>;


 template <typename... Index>

 using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;

 } // namespace detail


 constexpr detail::AllTag All() { return {}; }

 template <typename I>

 constexpr detail::RangeTag<I> Range(I beg, I end) {

  return {beg, end};

 }


 enum Order : std::uint8_t {

  kC, // 行主序

  kF, // 列主序

 };


 template <typename T, int32_t kDim>

 class TensorView {

  public

  using ShapeT = std::size_t[kDim];

  using StrideT = ShapeT;


  using element_type = T; // NOLINT

  using value_type = std::remove_cv_t<T>; // NOLINT


  private

  StrideT stride_{1};

  ShapeT shape_{0};

  common::Span<T> data_;

  T *ptr_{nullptr}; // 指向 data_ 的指针，以避免边界检查。


  size_t size_{0};

  DeviceOrd device_;


  // 与 `Tensor` 不同，data_ 可以具有任意大小，因为这只是一个视图。

  LINALG_HD void CalcSize() {

  if (data_.empty()) {

  size_ = 0;

  } else {

  size_ = detail::CalcSize(shape_);

     }

   }


  template <size_t old_dim, size_t new_dim, int32_t D, typename I>

  LINALG_HD size_t MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],

  detail::RangeTag<I> &&range) const {

  static_assert(new_dim < D);

  static_assert(old_dim < kDim);

  new_stride[new_dim] = stride_[old_dim];

  new_shape[new_dim] = range.Size();

  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);


  auto offset = stride_[old_dim] * range.beg;

  return offset;

   }

  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>

  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],

  detail::RangeTag<I> &&range, S &&...slices) const {

  static_assert(new_dim < D);

  static_assert(old_dim < kDim);

  new_stride[new_dim] = stride_[old_dim];

  new_shape[new_dim] = range.Size();

  assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);


  auto offset = stride_[old_dim] * range.beg;

  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,

  std::forward<S>(slices)...) +

  offset;

   }


  template <size_t old_dim, size_t new_dim, int32_t D>

  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {

  static_assert(new_dim < D);

  static_assert(old_dim < kDim);

  new_stride[new_dim] = stride_[old_dim];

  new_shape[new_dim] = shape_[old_dim];

  return 0;

   }

  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>

  LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,

  S &&...slices) const {

  static_assert(new_dim < D);

  static_assert(old_dim < kDim);

  new_stride[new_dim] = stride_[old_dim];

  new_shape[new_dim] = shape_[old_dim];

  return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,

  std::forward<S>(slices)...);

   }


  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>

  LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D],

  DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const {

  static_assert(old_dim < kDim);

  return stride_[old_dim] * i;

   }

  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>

  LINALG_HD std::enable_if_t<std::is_integral_v<Index>, size_t> MakeSliceDim(

  size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const {

  static_assert(old_dim < kDim);

  auto offset = stride_[old_dim] * i;

  auto res =

  MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);

  return res + offset;

   }


  public

  size_t constexpr static kValueSize = sizeof(T);

  size_t constexpr static kDimension = kDim;


  public

  template <typename I, std::int32_t D>

  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)

  : TensorView{data, shape, device, Order::kC} {}


  template <typename I, int32_t D>

  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device, Order order)

  : data_{data}, ptr_{data_.data()}, device_{device} {

  static_assert(D > 0 && D <= kDim, "无效形状。");

  // 形状

  detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });

  for (auto i = D; i < kDim; ++i) {

  shape_[i] = 1;

     }

  // 步长

  switch (order) {

  case Order::kC: {

  detail::CalcStride(shape_, stride_);

  break;

       }

  case Order::kF: {

  detail::CalcStride<kDim, true>(shape_, stride_);

  break;

       }

  default: {

  SPAN_CHECK(false);

       }

     }

  // 大小

  this->CalcSize();

   }


  template <typename I, std::int32_t D>

  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],

  DeviceOrd device)

  : data_{data}, ptr_{data_.data()}, device_{device} {

  static_assert(D == kDim, "无效的形状和步长。");

  detail::UnrollLoop<D>([&](auto i) {

  shape_[i] = shape[i];

  stride_[i] = stride[i];

     });

  this->CalcSize();

   }


  template <

  typename U,

  std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>

  LINALG_HD TensorView(TensorView<U, kDim> const &that) // NOLINT

  : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.Device()} {

  detail::UnrollLoop<kDim>([&](auto i) {

  stride_[i] = that.Stride(i);

  shape_[i] = that.Shape(i);

     });

   }


  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>

  LINALG_HD T &operator()(Index &&...index) {

  static_assert(sizeof...(index) <= kDim, "无效索引。");

  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);

  assert(offset < data_.size() && "超出边界访问。");

  return ptr_[offset];

   }

  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>

  LINALG_HD T const &operator()(Index &&...index) const {

  static_assert(sizeof...(index) <= kDim, "无效索引。");

  size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);

  assert(offset < data_.size() && "超出边界访问。");

  return ptr_[offset];

   }


  template <typename... S>

  LINALG_HD auto Slice(S &&...slices) const {

  static_assert(sizeof...(slices) <= kDim, "无效切片。");

  int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};

  size_t new_shape[kNewDim];

  size_t new_stride[kNewDim];

  auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);

  // 由于维度变化，ret 是不同的类型，因此我们无法访问其私有字段。

  // 字段。

  TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,

  device_};

  return ret;

   }


  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }

  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }

  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }

  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }


  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }

  [[nodiscard]] bool Empty() const { return Size() == 0; }

  [[nodiscard]] LINALG_HD bool Contiguous() const {

  return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();

   }

  [[nodiscard]] LINALG_HD bool CContiguous() const {

  StrideT stride;

  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);

  // 如果步长可以从形状计算出来，则它是连续的。

  detail::CalcStride(shape_, stride);

  return common::Span<size_t const, kDim>{stride_} == common::Span<size_t const, kDim>{stride};

   }

  [[nodiscard]] LINALG_HD bool FContiguous() const {

  StrideT stride;

  static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);

  // 如果步长可以从形状计算出来，则它是连续的。

  detail::CalcStride<kDim, true>(shape_, stride);

  return common::Span<size_t const, kDim>{stride_} == common::Span<size_t const, kDim>{stride};

   }

  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }

  LINALG_HD auto Device() const { return device_; }

 };


 template <typename Container, typename... S,

  std::enable_if_t<!common::detail::IsSpan<Container>::value &&

  !std::is_pointer_v<Container>> * = nullptr>

 auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT

  using T = std::conditional_t<std::is_const_v<Container>,

  std::add_const_t<typename Container::value_type>,

  typename Container::value_type>;

  std::size_t in_shape[sizeof...(S)];

  detail::IndexToArr(in_shape, std::forward<S>(shape)...);

  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device()};

 }


 template <typename T, decltype(common::dynamic_extent) ext, typename... S>

 LINALG_HD auto MakeTensorView(DeviceOrd device, common::Span<T, ext> data, S &&...shape) {

  std::size_t in_shape[sizeof...(S)];

  detail::IndexToArr(in_shape, std::forward<S>(shape)...);

  return TensorView<T, sizeof...(S)>{data, in_shape, device};

 }


 template <typename T, decltype(common::dynamic_extent) ext, typename... S>

 auto MakeTensorView(Context const *ctx, common::Span<T, ext> data, S &&...shape) {

  return MakeTensorView(ctx->Device(), data, std::forward<S>(shape)...);

 }


 template <typename T, decltype(common::dynamic_extent) ext, typename... S>

 auto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data, S &&...shape) {

  std::size_t in_shape[sizeof...(S)];

  detail::IndexToArr(in_shape, std::forward<S>(shape)...);

  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device(), order};

 }


 template <typename T, typename... S>

 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {

  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();

  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);

 }


 template <typename T, typename... S>

 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {

  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();

  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);

 }


 template <size_t D>

 LINALG_HD auto UnravelIndex(size_t idx, common::Span<size_t const, D> shape) {

  if (idx > std::numeric_limits<uint32_t>::max()) {

  return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);

  } else {

  return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);

   }

 }


 template <size_t D>

 LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {

  return UnravelIndex(idx, common::Span<std::size_t const, D>(shape));

 }


 template <typename... S>

 LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {

  std::size_t s[sizeof...(S)];

  detail::IndexToArr(s, shape...);

  return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));

 }


 template <typename T>

 using VectorView = TensorView<T, 1>;


 template <typename T>

 auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {

  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};

 }


 template <typename T>

 auto MakeVec(HostDeviceVector<T> *data) {

  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(),

  data->Size(), data->Device());

 }


 template <typename T>

 auto MakeVec(HostDeviceVector<T> const *data) {

  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),

  data->Size(), data->Device());

 }


 template <typename T>

 using MatrixView = TensorView<T, 2>;


 template <typename T, std::int32_t D>

 Json ArrayInterface(TensorView<T const, D> const &t) {

  Json array_interface{Object{}};

  array_interface["data"] = std::vector<Json>(2);

  array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};

  array_interface["data"][1] = Boolean{true};

  if (t.Device().IsCUDA()) {

  // Change this once we have different CUDA stream.

  array_interface["stream"] = Integer{2};

   }

  std::vector<Json> shape(t.Shape().size());

  std::vector<Json> stride(t.Stride().size());

  for (size_t i = 0; i < t.Shape().size(); ++i) {

  shape[i] = Integer(t.Shape(i));

  stride[i] = Integer(t.Stride(i) * sizeof(T));

   }

  array_interface["shape"] = Array{shape};

  array_interface["strides"] = Array{stride};

  array_interface["version"] = 3;


  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();

  static_assert(kT != '\0');

  if (DMLC_LITTLE_ENDIAN) {

  array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))};

  } else {

  array_interface["typestr"] = String{">" + (kT + std::to_string(sizeof(T)))};

   }

  return array_interface;

 }


 template <typename T, int32_t D>

 Json ArrayInterface(TensorView<T, D> const &t) {

  TensorView<T const, D> const &as_const = t;

  auto res = ArrayInterface(as_const);

  res["data"][1] = Boolean{false};

  return res;

 }


 template <typename T, int32_t D>

 auto ArrayInterfaceStr(TensorView<T const, D> const &t) {

  std::string str;

  Json::Dump(ArrayInterface(t), &str);

  return str;

 }


 template <typename T, int32_t D>

 auto ArrayInterfaceStr(TensorView<T, D> const &t) {

  std::string str;

  Json::Dump(ArrayInterface(t), &str);

  return str;

 }


 template <typename T>

 auto Make1dInterface(T const *vec, std::size_t len) {

  Context ctx;

  auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);

  auto str = linalg::ArrayInterfaceStr(t);

  return str;

 }


 template <typename T, int32_t kDim = 5>

 class Tensor {

  public

  using ShapeT = std::size_t[kDim];

  using StrideT = ShapeT;


  private

  HostDeviceVector<T> data_;

  ShapeT shape_{0};

  Order order_{Order::kC};


  template <typename I, std::int32_t D>

  void Initialize(I const (&shape)[D], DeviceOrd device) {

  static_assert(D <= kDim, "Invalid shape.");

  std::copy(shape, shape + D, shape_);

  for (auto i = D; i < kDim; ++i) {

  shape_[i] = 1;

     }

  if (!device.IsCPU()) {

  data_.SetDevice(device);

  data_.ConstDevicePointer(); // Pull to device;

     }

  CHECK_EQ(data_.Size(), detail::CalcSize(shape_));

   }


  public

  Tensor() = default;


  template <typename I, int32_t D>

  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)

  : Tensor{common::Span<I const, D>{shape}, device, order} {}


  template <typename I, size_t D>

  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)

  : order_{order} {

  // No device unroll as this is a host only function.

  std::copy(shape.data(), shape.data() + D, shape_);

  for (auto i = D; i < kDim; ++i) {

  shape_[i] = 1;

     }

  auto size = detail::CalcSize(shape_);

  if (!device.IsCPU()) {

  data_.SetDevice(device);

     }

  data_.Resize(size);

  if (!device.IsCPU()) {

  data_.DevicePointer(); // Pull to device

     }

   }

  template <typename It, typename I, int32_t D>

  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)

  : order_{order} {

  auto &h_vec = data_.HostVector();

  h_vec.insert(h_vec.begin(), begin, end);

  // shape

  this->Initialize(shape, device);

   }


  template <typename I, int32_t D>

  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,

  Order order = kC)

  : order_{order} {

  auto &h_vec = data_.HostVector();

  h_vec = data;

  // shape

  this->Initialize(shape, device);

   }

  template <typename... Index>

  T &operator()(Index &&...idx) {

  return this->HostView()(std::forward<Index>(idx)...);

   }

  template <typename... Index>

  T const &operator()(Index &&...idx) const {

  return this->HostView()(std::forward<Index>(idx)...);

   }


  auto View(DeviceOrd device) {

  if (device.IsCPU()) {

  auto span = data_.HostSpan();

  return TensorView<T, kDim>{span, shape_, device, order_};

  } else {

  data_.SetDevice(device);

  auto span = data_.DeviceSpan();

  return TensorView<T, kDim>{span, shape_, device, order_};

     }

   }

  auto View(DeviceOrd device) const {

  if (device.IsCPU()) {

  auto span = data_.ConstHostSpan();

  return TensorView<T const, kDim>{span, shape_, device, order_};

  } else {

  data_.SetDevice(device);

  auto span = data_.ConstDeviceSpan();

  return TensorView<T const, kDim>{span, shape_, device, order_};

     }

   }


  auto HostView() { return this->View(DeviceOrd::CPU()); }

  auto HostView() const { return this->View(DeviceOrd::CPU()); }


  [[nodiscard]] std::size_t Size() const { return data_.Size(); }

  [[nodiscard]] bool Empty() const { return Size() == 0; }


  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }

  auto Shape(size_t i) const { return shape_[i]; }


  HostDeviceVector<T> *Data() { return &data_; }

  HostDeviceVector<T> const *Data() const { return &data_; }


  template <typename Fn>

  void ModifyInplace(Fn &&fn) {

  fn(this->Data(), common::Span<size_t, kDim>{this->shape_});

  CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))

  << "Inconsistent size after modification.";

   }


  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>

  void Reshape(S &&...s) {

  static_assert(sizeof...(S) <= kDim, "Invalid shape.");

  detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);

  auto constexpr kEnd = sizeof...(S);

  static_assert(kEnd <= kDim, "Invalid shape.");

  std::fill(shape_ + kEnd, shape_ + kDim, 1);

  auto n = detail::CalcSize(shape_);

  data_.Resize(n);

   }


  template <size_t D>

  void Reshape(common::Span<size_t const, D> shape) {

  static_assert(D <= kDim, "Invalid shape.");

  std::copy(shape.data(), shape.data() + D, this->shape_);

  std::fill(shape_ + D, shape_ + kDim, 1);

  auto n = detail::CalcSize(shape_);

  data_.Resize(n);

   }


  template <size_t D>

  void Reshape(size_t (&shape)[D]) {

  this->Reshape(common::Span<size_t const, D>{shape});

   }

  template <typename... S>

  auto Slice(S &&...slices) const {

  return this->HostView().Slice(std::forward<S>(slices)...);

   }

  template <typename... S>

  auto Slice(S &&...slices) {

  return this->HostView().Slice(std::forward<S>(slices)...);

   }


  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }

  [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }

 };


 template <typename T>

 using Matrix = Tensor<T, 2>;


 template <typename T>

 using Vector = Tensor<T, 1>;


 template <typename T, typename... Index>

 auto Empty(Context const *ctx, Index &&...index) {

  Tensor<T, sizeof...(Index)> t;

  t.SetDevice(ctx->Device());

  t.Reshape(index...);

  return t;

 }


 template <typename T, typename... Index>

 auto Constant(Context const *ctx, T v, Index &&...index) {

  Tensor<T, sizeof...(Index)> t;

  t.SetDevice(ctx->Device());

  t.Reshape(index...);

  t.Data()->Fill(std::move(v));

  return t;

 }


 template <typename T, typename... Index>

 auto Zeros(Context const *ctx, Index &&...index) {

  return Constant(ctx, static_cast<T>(0), index...);

 }


 // Only first axis is supported for now.

 template <typename T, int32_t D>

 void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {

  if (r.Device().IsCUDA()) {

  l->SetDevice(r.Device());

   }

  l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {

  for (size_t i = 1; i < D; ++i) {

  if (shape[i] == 0) {

  shape[i] = r.Shape(i);

  } else {

  CHECK_EQ(shape[i], r.Shape(i));

       }

     }

  data->Extend(*r.Data());

  shape[0] = l->Shape(0) + r.Shape(0);

   });

 }

 } // namespace xgboost::linalg


 #if defined(LINALG_HD)

 #undef LINALG_HD

 #endif // defined(LINALG_HD)

 #endif // XGBOOST_LINALG_H_

base.h
为 xgboost 定义配置宏和基本类型。

xgboost::HostDeviceVector
定义： host_device_vector.h:87

xgboost::HostDeviceVector::Size
std::size_t Size() const

xgboost::HostDeviceVector::ConstDevicePointer
const T * ConstDevicePointer() const

xgboost::HostDeviceVector::Extend
void Extend(const HostDeviceVector< T > &other)

xgboost::HostDeviceVector::ConstHostSpan
common::Span< T const > ConstHostSpan() const
Definition: host_device_vector.h:116

xgboost::HostDeviceVector::HostVector
std::vector< T > & HostVector()

xgboost::HostDeviceVector::ConstDeviceSpan
common::Span< const T > ConstDeviceSpan() const

xgboost::HostDeviceVector::HostPointer
T * HostPointer()
定义： host_device_vector.h:113

xgboost::HostDeviceVector::DeviceSpan
common::Span< T > DeviceSpan()

xgboost::HostDeviceVector::HostSpan
common::Span< T > HostSpan()
定义: host_device_vector.h:114

xgboost::HostDeviceVector::SetDevice
void SetDevice(DeviceOrd device) const

xgboost::HostDeviceVector::Device
DeviceOrd Device() const

xgboost::HostDeviceVector::Resize
void Resize(std::size_t new_size)

xgboost::HostDeviceVector::DevicePointer
T * DevicePointer()

xgboost::HostDeviceVector::ConstHostPointer
const T * ConstHostPointer() const
定义： host_device_vector.h:117

xgboost::JsonArray
定义: json.h:116

xgboost::JsonBoolean
描述 true 和 false。
定义: json.h:350

xgboost::JsonInteger
定义: json.h:295

xgboost::JsonObject
定义: json.h:219

xgboost::JsonString
定义: json.h:89

xgboost::Json
表示JSON格式的数据结构。
Definition: json.h:392

xgboost::Json::Dump
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
编码 JSON 对象。可选参数 mode 用于选择文本或二进制 (ubjson) 输出。

xgboost::common::Span
span类实现，基于ISO++20 span<T>。接口应相同。
Definition: span.h:431

xgboost::common::Span::data
constexpr XGBOOST_DEVICE pointer data() const __span_noexcept
Definition: span.h:550

xgboost::common::Span::subspan
XGBOOST_DEVICE auto subspan() const -> Span< element_type, detail::ExtentValue< Extent, Offset, Count >::value >
定义： span.h:597

xgboost::common::Span::size
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:555

xgboost::common::Span::empty
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
定义： span.h:562

xgboost::linalg::TensorView
具有静态类型和维度的张量视图。它实现了索引和切片。
定义： linalg.h:277

xgboost::linalg::TensorView::Size
LINALG_HD std::size_t Size() const
张量中的项目数量。
定义： linalg.h:520

xgboost::linalg::TensorView::TensorView
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device)
创建具有数据和形状的张量。
定义： linalg.h:391

xgboost::linalg::TensorView::value_type
std::remove_cv_t< T > value_type
定义： linalg.h:283

xgboost::linalg::TensorView::element_type
T element_type
定义： linalg.h:282

xgboost::linalg::TensorView::CContiguous
LINALG_HD bool CContiguous() const
是否为C型连续数组。
定义： linalg.h:531

xgboost::linalg::TensorView::Stride
LINALG_HD auto Stride(size_t i) const
定义： linalg.h:515

xgboost::linalg::TensorView::TensorView
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], DeviceOrd device, Order order)
定义： linalg.h:395

xgboost::linalg::TensorView::Shape
LINALG_HD auto Shape() const
定义： linalg.h:506

xgboost::linalg::TensorView::StrideT
ShapeT StrideT
定义： linalg.h:280

xgboost::linalg::TensorView::kDimension
constexpr static size_t kDimension
定义： linalg.h:376

xgboost::linalg::TensorView::Stride
LINALG_HD auto Stride() const
定义： linalg.h:511

xgboost::linalg::TensorView::Slice
LINALG_HD auto Slice(S &&...slices) const
切片张量。返回的张量具有推断的维度和形状。不支持标量结果。
定义： linalg.h:493

xgboost::linalg::TensorView::Values
LINALG_HD auto Values() const -> decltype(data_) const &
获取原始数据的引用。
定义： linalg.h:551

xgboost::linalg::TensorView::Contiguous
LINALG_HD bool Contiguous() const
这是否是一个连续数组，C 和 F 连续都返回 true。
定义： linalg.h:525

xgboost::linalg::TensorView::Empty
bool Empty() const
定义： linalg.h:521

xgboost::linalg::TensorView::operator()
LINALG_HD T const & operator()(Index &&...index) const
索引张量以获取标量值。
定义： linalg.h:472

xgboost::linalg::TensorView::TensorView
LINALG_HD TensorView(TensorView< U, kDim > const &that)
定义： linalg.h:440

xgboost::linalg::TensorView::operator()
LINALG_HD T & operator()(Index &&...index)
索引张量以获取标量值。
定义： linalg.h:462

xgboost::linalg::TensorView::kValueSize
constexpr static size_t kValueSize
定义： linalg.h:375

xgboost::linalg::TensorView::FContiguous
LINALG_HD bool FContiguous() const
是否为F型连续数组。
定义： linalg.h:541

xgboost::linalg::TensorView::Shape
LINALG_HD auto Shape(size_t i) const
定义： linalg.h:510

xgboost::linalg::TensorView::TensorView
LINALG_HD TensorView(common::Span< T > data, I const (&shape)[D], I const (&stride)[D], DeviceOrd device)
使用数据、形状和步幅创建张量。如果步幅可以计算，请勿使用此构造函数...
定义： linalg.h:426

xgboost::linalg::TensorView::ShapeT
std::size_t[kDim] ShapeT
定义： linalg.h:279

xgboost::linalg::TensorView::Device
LINALG_HD auto Device() const
获取CUDA设备序号。
定义： linalg.h:555

xgboost::linalg::Tensor
一个张量存储。要将其用于切片等其他功能，首先需要获取一个视图...
定义： linalg.h:745

xgboost::linalg::Tensor::Slice
auto Slice(S &&...slices)
获取切片的主机视图。
定义： linalg.h:931

xgboost::linalg::Tensor::Empty
bool Empty() const
定义： linalg.h:865

xgboost::linalg::Tensor::Tensor
Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order=kC)
定义： linalg.h:803

xgboost::linalg::Tensor::Slice
auto Slice(S &&...slices) const
获取切片的主机视图。
定义： linalg.h:924

xgboost::linalg::Tensor::Data
HostDeviceVector< T > const * Data() const
定义： linalg.h:871

xgboost::linalg::Tensor::Reshape
void Reshape(size_t(&shape)[D])
定义： linalg.h:917

xgboost::linalg::Tensor::View
auto View(DeviceOrd device) const
定义： linalg.h:850

xgboost::linalg::Tensor::HostView
auto HostView()
定义： linalg.h:861

xgboost::linalg::Tensor::Shape
auto Shape(size_t i) const
定义： linalg.h:868

xgboost::linalg::Tensor::Data
HostDeviceVector< T > * Data()
定义： linalg.h:870

xgboost::linalg::Tensor::operator()
T & operator()(Index &&...idx)
索引运算符。非线程安全，不应用于性能关键区域...
定义： linalg.h:825

xgboost::linalg::Tensor::View
auto View(DeviceOrd device)
获取此张量的 TensorView。
定义： linalg.h:840

xgboost::linalg::Tensor::Tensor
Tensor(common::Span< I const, D > shape, DeviceOrd device, Order order=kC)
定义： linalg.h:783

xgboost::linalg::Tensor::Shape
auto Shape() const
定义： linalg.h:867

xgboost::linalg::Tensor::Tensor
Tensor()=default

xgboost::linalg::Tensor::ModifyInplace
void ModifyInplace(Fn &&fn)
用于更改形状和数据的修改器函数。
定义： linalg.h:880

xgboost::linalg::Tensor::SetDevice
void SetDevice(DeviceOrd device) const
设置此张量的设备序号。
定义： linalg.h:938

xgboost::linalg::Tensor::Tensor
Tensor(std::initializer_list< T > data, I const (&shape)[D], DeviceOrd device, Order order=kC)
定义： linalg.h:812

xgboost::linalg::Tensor::Reshape
void Reshape(common::Span< size_t const, D > shape)
重塑张量。
定义： linalg.h:908

xgboost::linalg::Tensor::Device
DeviceOrd Device() const
定义： linalg.h:939

xgboost::linalg::Tensor::HostView
auto HostView() const
定义： linalg.h:862

xgboost::linalg::Tensor::Tensor
Tensor(I const (&shape)[D], DeviceOrd device, Order order=kC)
创建具有形状和设备序号的张量。存储将自动初始化。
定义： linalg.h:779

xgboost::linalg::Tensor::operator()
T const & operator()(Index &&...idx) const
索引运算符。非线程安全，不应用于性能关键区域...
定义： linalg.h:833

xgboost::linalg::Tensor::ShapeT
std::size_t[kDim] ShapeT
定义： linalg.h:747

xgboost::linalg::Tensor::Reshape
void Reshape(S &&...s)
重塑张量。
定义： linalg.h:892

xgboost::linalg::Tensor::StrideT
ShapeT StrideT
定义： linalg.h:748

xgboost::linalg::Tensor::Size
std::size_t Size() const
定义： linalg.h:864

context.h

host_device_vector.h
设备与主机向量抽象层。

json.h

LINALG_HD
#define LINALG_HD
定义： linalg.h:36

xgboost::common::dynamic_extent
constexpr std::size_t dynamic_extent
定义： span.h:150

xgboost::common::Span
Span(std::vector< T > const &) -> Span< T const >

xgboost::linalg::detail::IndexToTag
std::conditional_t< std::is_integral_v< RemoveCRType< S > >, IntTag, S > IndexToTag
定义： linalg.h:117

xgboost::linalg::detail::UnravelImpl
LINALG_HD auto UnravelImpl(I idx, common::Span< size_t const, D > shape)
定义： linalg.h:194

xgboost::linalg::detail::ReshapeImpl
void ReshapeImpl(size_t(&out_shape)[D], I s)
定义： linalg.h:215

xgboost::linalg::detail::Popc
LINALG_HD int Popc(uint32_t v)
定义： linalg.h:136

xgboost::linalg::detail::RemoveCRType
std::remove_const_t< std::remove_reference_t< S > > RemoveCRType
定义： linalg.h:114

xgboost::linalg::detail::CalcSliceDim
constexpr int32_t CalcSliceDim()
计算切片张量的维度。
定义： linalg.h:95

xgboost::linalg::detail::UnrollLoop
constexpr LINALG_HD auto UnrollLoop(Fn fn)
定义： linalg.h:120

xgboost::linalg::detail::Offset
constexpr size_t Offset(S(&strides)[D], size_t n, Head head)
定义： linalg.h:53

xgboost::linalg::detail::IndexToArr
LINALG_HD void IndexToArr(std::size_t(&arr)[D], Head head)
定义： linalg.h:161

xgboost::linalg::detail::CalcStride
constexpr void CalcStride(size_t const (&shape)[D], size_t(&stride)[D])
定义： linalg.h:66

xgboost::linalg::detail::ArrToTuple
constexpr auto ArrToTuple(T(&arr)[N], std::index_sequence< Idx... >)
定义： linalg.h:178

xgboost::linalg::detail::NativePopc
int32_t NativePopc(T v)
定义： linalg.h:130

xgboost::linalg::detail::EnableIfIntegral
std::enable_if_t< IsAllIntegral< Index... >::value > EnableIfIntegral
定义： linalg.h:243

xgboost::linalg::detail::CalcSize
constexpr size_t CalcSize(size_t(&shape)[D])
定义： linalg.h:105

xgboost::linalg
定义： linalg.h:40

xgboost::linalg::Range
constexpr detail::RangeTag< I > Range(I beg, I end)
指定轴中元素的范围进行切片。
定义： linalg.h:254

xgboost::linalg::Make1dInterface
auto Make1dInterface(T const *vec, std::size_t len)
定义： linalg.h:733

xgboost::linalg::MakeTensorView
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape)
用于自动类型推断的构造函数。
定义： linalg.h:564

xgboost::linalg::ArrayInterfaceStr
auto ArrayInterfaceStr(TensorView< T const, D > const &t)
返回数组接口的字符串表示。
定义： linalg.h:719

xgboost::linalg::MakeVec
auto MakeVec(T *ptr, size_t s, DeviceOrd device=DeviceOrd::CPU())
从连续内存创建向量视图。
定义： linalg.h:644

xgboost::linalg::UnravelIndex
LINALG_HD auto UnravelIndex(size_t idx, common::Span< size_t const, D > shape)
将线性索引转换为多维索引。类似于 numpy unravel。
定义： linalg.h:608

xgboost::linalg::Stack
void Stack(Tensor< T, D > *l, Tensor< T, D > const &r)
定义： linalg.h:981

xgboost::linalg::Constant
auto Constant(Context const *ctx, T v, Index &&...index)
创建值为v的数组。
定义： linalg.h:963

xgboost::linalg::Zeros
auto Zeros(Context const *ctx, Index &&...index)
类似于np.zeros，返回一个给定形状和类型，填充为零的新数组。
定义： linalg.h:975

xgboost::linalg::Empty
auto Empty(Context const *ctx, Index &&...index)
创建未初始化的数组。
定义： linalg.h:952

xgboost::linalg::All
constexpr detail::AllTag All()
指定切片轴中的所有元素。
定义: linalg.h:249

xgboost::linalg::ArrayInterface
Json ArrayInterface(TensorView< T const, D > const &t)
Numpy定义的数组接口。
定义： linalg.h:675

xgboost::linalg::Order
Order
定义： linalg.h:258

xgboost::linalg::kC
@ kC
定义： linalg.h:259

xgboost::linalg::kF
@ kF
定义： linalg.h:260

xgboost::Integer
JsonInteger Integer
定义： json.h:621

span.h

SPAN_CHECK
#define SPAN_CHECK(cond)
定义： span.h:127

xgboost::Context
XGBoost的运行时上下文。包含线程和设备等信息。
Definition: context.h:133

xgboost::Context::Device
DeviceOrd Device() const
获取当前设备和序号。
定义： context.h:200

xgboost::Context::IsCPU
bool IsCPU() const
XGBoost 是否在 CPU 上运行？
定义： context.h:173

xgboost::DeviceOrd
设备序号的类型。该类型被打包成32位，以便在查看类型（如lin...）时高效使用
Definition: context.h:34

xgboost::DeviceOrd::IsCUDA
bool IsCUDA() const
定义： context.h:44

xgboost::DeviceOrd::IsCPU
bool IsCPU() const
定义： context.h:45

xgboost::DeviceOrd::CPU
constexpr static auto CPU()
CPU 的构造函数。
定义： context.h:64

xgboost::linalg::detail::AllTag
定义： linalg.h:80

xgboost::linalg::detail::ArrayInterfaceHandler
定义： linalg.h:43

xgboost::linalg::detail::ArrayInterfaceHandler::TypeChar
static constexpr char TypeChar()
定义： linalg.h:45

xgboost::linalg::detail::Conjunction< B1 >
定义： linalg.h:234

xgboost::linalg::detail::Conjunction
定义： linalg.h:232

xgboost::linalg::detail::IntTag
定义： linalg.h:82

xgboost::linalg::detail::RangeTag
定义： linalg.h:85

xgboost::linalg::detail::RangeTag::Size
constexpr size_t Size() const
定义： linalg.h:88

xgboost::linalg::detail::RangeTag::end
I end
定义： linalg.h:87

xgboost::linalg::detail::RangeTag::beg
I beg
定义： linalg.h:86