关于数据集的元信息，始终驻留在内存中。更多...

#include <data.h>

xgboost::MetaInfo 的协作图

公共成员函数
	MetaInfo ()=default
	默认构造函数更多...

	MetaInfo (MetaInfo &&that)=default

MetaInfo &	operator= (MetaInfo &&that)=default

MetaInfo &	operator= (MetaInfo const &that)=delete

void	Validate (DeviceOrd device) const
	验证所有元信息。更多...

MetaInfo	Slice (Context const *ctx, common::Span< bst_idx_t const > ridxs, bst_idx_t nnz) const
	切片元信息。更多...

MetaInfo	Copy () const

bool	IsDense () const
	矩阵是否为密集型。更多...

bst_float	GetWeight (size_t i) const
	获取每个实例的权重。更多...

const std::vector< size_t > &	LabelAbsSort (Context const *ctx) const
	按绝对值获取标签的排序索引 (argsort)（由 cox loss 使用）更多...

void	Clear ()
	清除所有信息更多...

void	LoadBinary (dmlc::Stream *fi)
	从二进制流加载元信息。更多...

void	SaveBinary (dmlc::Stream *fo) const
	将元信息保存到二进制流。更多...

void	SetInfo (Context const &ctx, StringView key, StringView interface_str)
	使用数组接口设置元信息中的信息。更多...

void	GetInfo (char const key, bst_ulong out_len, DataType dtype, const void **out_dptr) const

void	SetFeatureInfo (const char key, const char *info, const bst_ulong size)

void	GetFeatureInfo (const char field, std::vector< std::string > out_str_vecs) const

void	Extend (MetaInfo const &that, bool accumulate_rows, bool check_column)
	用其他 MetaInfo 扩展。更多...

void	SynchronizeNumberOfColumns (Context const *ctx, DataSplitMode split_mode)
	在所有 worker 中同步列数。更多...

bool	IsRowSplit () const
	数据是否按行分割。更多...

bool	IsColumnSplit () const
	数据是否按列分割。更多...

bool	IsRanking () const
	这是否是排序学习数据。更多...

bool	IsVerticalFederated () const
	检查我们是否正在进行垂直联邦学习的便捷方法，这需要一些特殊的处理。更多...

bool	ShouldHaveLabels () const
	检查 MetaInfo 是否应包含标签的便捷方法。更多...

bool	HasCategorical () const
	标记 DMatrix 是否包含类别特征。更多...

公共属性
bst_idx_t	num_row_ {0}
	数据中的行数更多...

uint64_t	num_col_ {0}
	数据中的列数更多...

uint64_t	num_nonzero_ {0}
	数据中的非零项数更多...

linalg::Tensor< float, 2 >	labels
	每个实例的标签更多...

DataSplitMode	data_split_mode {DataSplitMode::kRow}
	数据分割模式更多...

std::vector< bst_group_t >	group_ptr_
	当学习任务是排序时所需的组的开始和结束索引。更多...

HostDeviceVector< bst_float >	weights_
	每个实例的权重，可选更多...

linalg::Matrix< float >	base_margin_
	初始化的 margin，如果指定，xgboost 将从此初始 margin 开始。可用于指定要进行boosting的初始预测。更多...

HostDeviceVector< bst_float >	labels_lower_bound_
	标签的下界，用于生存分析（删失回归）更多...

HostDeviceVector< bst_float >	labels_upper_bound_
	标签的上界，用于生存分析（删失回归）更多...

std::vector< std::string >	feature_type_names
	用户为每个特征提供的类型名称。例如："int"/"float"/"i"/"q"。更多...

std::vector< std::string >	feature_names
	每个特征的名称。更多...

HostDeviceVector< FeatureType >	feature_types

HostDeviceVector< float >	feature_weights

静态公共属性
static constexpr uint64_t	kNumField = 12
	MetaInfo 中的数据字段数更多...

详细描述

关于数据集的元信息，始终驻留在内存中。

构造函数与析构函数文档

◆ MetaInfo() [1/2]

xgboost::MetaInfo::MetaInfo ( )

default

默认构造函数

◆ MetaInfo() [2/2]

xgboost::MetaInfo::MetaInfo ( MetaInfo && that )

default

成员函数文档

◆ Clear()

void xgboost::MetaInfo::Clear ( )

清除所有信息

◆ Copy()

MetaInfo xgboost::MetaInfo::Copy ( ) const

◆ Extend()

void xgboost::MetaInfo::Extend	(	MetaInfo const &	that,
		bool	accumulate_rows,
		bool	check_column
	)

用其他 MetaInfo 扩展。

参数

that	另一个 MetaInfo 对象。
accumulate_rows	在此函数中是否需要累积行数。如果客户端代码提前知道行数，请将此参数设置为 false。
check_column	扩展方法是否应检查列的一致性。

◆ GetFeatureInfo()

void xgboost::MetaInfo::GetFeatureInfo	(	const char *	field,
		std::vector< std::string > *	out_str_vecs
	)		const

◆ GetInfo()

void xgboost::MetaInfo::GetInfo	(	char const *	key,
		bst_ulong *	out_len,
		DataType	dtype,
		const void **	out_dptr
	)		const

◆ GetWeight()

bst_float xgboost::MetaInfo::GetWeight ( size_t i ) const

inline

获取每个实例的权重。

参数

i 实例索引。

返回: 权重。

◆ HasCategorical()

bool xgboost::MetaInfo::HasCategorical ( ) const

inline

标记 DMatrix 是否包含类别特征。

◆ IsColumnSplit()

bool xgboost::MetaInfo::IsColumnSplit ( ) const

inline

数据是否按列分割。

◆ IsDense()

bool xgboost::MetaInfo::IsDense ( ) const

inline

矩阵是否为密集型。

◆ IsRanking()

bool xgboost::MetaInfo::IsRanking ( ) const

inline

这是否是排序学习数据。

◆ IsRowSplit()

bool xgboost::MetaInfo::IsRowSplit ( ) const

inline

数据是否按行分割。

◆ IsVerticalFederated()

bool xgboost::MetaInfo::IsVerticalFederated ( ) const

检查我们是否正在进行垂直联邦学习的便捷方法，这需要一些特殊的处理。

◆ LabelAbsSort()

const std::vector<size_t>& xgboost::MetaInfo::LabelAbsSort ( Context const * ctx ) const

按绝对值获取标签的排序索引 (argsort)（由 cox loss 使用）

◆ LoadBinary()

void xgboost::MetaInfo::LoadBinary ( dmlc::Stream * fi )

从二进制流加载元信息。

参数

fi 输入流

◆ operator=() [1/2]

MetaInfo& xgboost::MetaInfo::operator= ( MetaInfo && that )

default

◆ operator=() [2/2]

MetaInfo& xgboost::MetaInfo::operator= ( MetaInfo const & that )

delete

◆ SaveBinary()

void xgboost::MetaInfo::SaveBinary ( dmlc::Stream * fo ) const

将元信息保存到二进制流。

参数

fo 输出流。

◆ SetFeatureInfo()

void xgboost::MetaInfo::SetFeatureInfo	(	const char *	key,
		const char **	info,
		const bst_ulong	size
	)

◆ SetInfo()

void xgboost::MetaInfo::SetInfo	(	Context const &	ctx,
		StringView	key,
		StringView	interface_str
	)

使用数组接口设置元信息中的信息。

参数

key	信息的键。
interface_str	json 格式数组接口的字符串表示。

◆ ShouldHaveLabels()

bool xgboost::MetaInfo::ShouldHaveLabels ( ) const

检查 MetaInfo 是否应包含标签的便捷方法。

通常我们假设标签在任何地方都可用。唯一的例外是垂直联邦学习，其中标签仅在 worker 0 上可用。

◆ Slice()

MetaInfo xgboost::MetaInfo::Slice	(	Context const *	ctx,
		common::Span< bst_idx_t const >	ridxs,
		bst_idx_t	nnz
	)		const

切片元信息。

ridxs 的设备由 ctx 对象指定。

参数

ridxs	选定行的索引。
nnz	非缺失值的数量。

◆ SynchronizeNumberOfColumns()

void xgboost::MetaInfo::SynchronizeNumberOfColumns	(	Context const *	ctx,
		DataSplitMode	split_mode
	)

在所有 worker 中同步列数。

通常我们只需要在所有 worker 中找到最大列数，但在垂直联邦学习中，由于每个 worker 加载自己的列列表，我们需要将它们相加。

◆ Validate()

void xgboost::MetaInfo::Validate ( DeviceOrd device ) const

验证所有元信息。

成员数据文档

◆ base_margin_

linalg::Matrix<float> xgboost::MetaInfo::base_margin_

初始化的 margin，如果指定，xgboost 将从此初始 margin 开始。可用于指定要进行boosting的初始预测。

◆ data_split_mode

DataSplitMode xgboost::MetaInfo::data_split_mode {DataSplitMode::kRow}

数据分割模式

◆ feature_names

std::vector<std::string> xgboost::MetaInfo::feature_names

每个特征的名称。

◆ feature_type_names

std::vector<std::string> xgboost::MetaInfo::feature_type_names

用户为每个特征提供的类型名称。例如："int"/"float"/"i"/"q"。

◆ feature_types

HostDeviceVector<FeatureType> xgboost::MetaInfo::feature_types

◆ feature_weights

HostDeviceVector<float> xgboost::MetaInfo::feature_weights

◆ group_ptr_

std::vector<bst_group_t> xgboost::MetaInfo::group_ptr_

当学习任务是排序时所需的组的开始和结束索引。

◆ kNumField

constexpr uint64_t xgboost::MetaInfo::kNumField = 12

staticconstexpr

MetaInfo 中的数据字段数

◆ labels

linalg::Tensor<float, 2> xgboost::MetaInfo::labels

每个实例的标签

◆ labels_lower_bound_

HostDeviceVector<bst_float> xgboost::MetaInfo::labels_lower_bound_

标签的下界，用于生存分析（删失回归）

◆ labels_upper_bound_

HostDeviceVector<bst_float> xgboost::MetaInfo::labels_upper_bound_

标签的上界，用于生存分析（删失回归）

◆ num_col_

uint64_t xgboost::MetaInfo::num_col_ {0}

数据中的列数

◆ num_nonzero_

uint64_t xgboost::MetaInfo::num_nonzero_ {0}

数据中的非零项数

◆ num_row_

bst_idx_t xgboost::MetaInfo::num_row_ {0}

数据中的行数

◆ weights_

HostDeviceVector<bst_float> xgboost::MetaInfo::weights_

每个实例的权重，可选

本类的文档生成自以下文件

/home/docs/checkouts/readthedocs.org/user_builds/xgboost/checkouts/release_3.0.0/include/xgboost/data.h

公共成员函数

公共属性

静态公共属性

详细描述

构造函数与析构函数文档

◆ MetaInfo() [1/2]

◆ MetaInfo() [2/2]

成员函数文档

◆ Clear()

◆ Copy()

◆ Extend()

◆ GetFeatureInfo()

◆ GetInfo()

◆ GetWeight()

◆ HasCategorical()

◆ IsColumnSplit()

◆ IsDense()

◆ IsRanking()

◆ IsRowSplit()

◆ IsVerticalFederated()

◆ LabelAbsSort()

◆ LoadBinary()

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ SaveBinary()

◆ SetFeatureInfo()

◆ SetInfo()

◆ ShouldHaveLabels()

◆ Slice()

◆ SynchronizeNumberOfColumns()

◆ Validate()

成员数据文档

◆ base_margin_

◆ data_split_mode

◆ feature_names

◆ feature_type_names

◆ feature_types

◆ feature_weights

◆ group_ptr_

◆ kNumField

◆ labels

◆ labels_lower_bound_

◆ labels_upper_bound_

◆ num_col_

◆ num_nonzero_

◆ num_row_

◆ weights_