[feature-wip][array-type] Refactor type info for nested array. (#8279)

This commit is contained in:
Adonis Ling
2022-03-02 14:20:39 +08:00
committed by GitHub
parent b241bc4e9d
commit b40e9144cb
35 changed files with 138 additions and 122 deletions

View File

@ -99,10 +99,10 @@ struct BaseAggregateFuncs {
return;
}
if constexpr (field_type == OLAP_FIELD_TYPE_ARRAY) {
const TypeInfo* _type_info = get_collection_type_info(sub_type);
auto _type_info = get_collection_type_info(sub_type);
_type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool);
} else {
const TypeInfo* _type_info = get_type_info(field_type);
auto _type_info = get_type_info(field_type);
_type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool);
}
}

View File

@ -36,7 +36,7 @@ class ColumnBlock {
public:
ColumnBlock(ColumnVectorBatch* batch, MemPool* pool) : _batch(batch), _pool(pool) {}
const TypeInfo* type_info() const { return _batch->type_info(); }
const TypeInfo* type_info() const { return _batch->type_info().get(); }
uint8_t* data() const { return _batch->data(); }
bool is_nullable() const { return _batch->is_nullable(); }
MemPool* pool() const { return _pool; }

View File

@ -31,7 +31,7 @@ Status ColumnVectorBatch::resize(size_t new_cap) {
return Status::OK();
}
Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const TypeInfo* type_info,
Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, std::shared_ptr<const TypeInfo> type_info,
Field* field,
std::unique_ptr<ColumnVectorBatch>* column_vector_batch) {
if (is_scalar_type(type_info->type())) {
@ -133,13 +133,13 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T
}
std::unique_ptr<ColumnVectorBatch> elements;
auto array_type_info = reinterpret_cast<const ArrayTypeInfo*>(type_info);
auto array_type_info = dynamic_cast<const ArrayTypeInfo*>(type_info.get());
RETURN_IF_ERROR(ColumnVectorBatch::create(
init_capacity * 2, field->get_sub_field(0)->is_nullable(),
array_type_info->item_type_info(), field->get_sub_field(0), &elements));
std::unique_ptr<ColumnVectorBatch> offsets;
TypeInfo* offsets_type_info =
auto offsets_type_info =
get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT);
RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, offsets_type_info,
nullptr, &offsets));
@ -160,7 +160,7 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T
}
template <class ScalarType>
ScalarColumnVectorBatch<ScalarType>::ScalarColumnVectorBatch(const TypeInfo* type_info,
ScalarColumnVectorBatch<ScalarType>::ScalarColumnVectorBatch(std::shared_ptr<const TypeInfo> type_info,
bool is_nullable)
: ColumnVectorBatch(type_info, is_nullable), _data(0) {}
@ -176,7 +176,7 @@ Status ScalarColumnVectorBatch<ScalarType>::resize(size_t new_cap) {
return Status::OK();
}
ArrayColumnVectorBatch::ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable,
ArrayColumnVectorBatch::ArrayColumnVectorBatch(std::shared_ptr<const TypeInfo> type_info, bool is_nullable,
ScalarColumnVectorBatch<uint32_t>* offsets,
ColumnVectorBatch* elements)
: ColumnVectorBatch(type_info, is_nullable), _data(0) {

View File

@ -54,7 +54,7 @@ public:
// struct that contains column data(null bitmap), data array in sub class.
class ColumnVectorBatch {
public:
explicit ColumnVectorBatch(const TypeInfo* type_info, bool is_nullable)
explicit ColumnVectorBatch(std::shared_ptr<const TypeInfo> type_info, bool is_nullable)
: _type_info(type_info),
_capacity(0),
_delete_state(DEL_NOT_SATISFIED),
@ -63,7 +63,7 @@ public:
virtual ~ColumnVectorBatch();
const TypeInfo* type_info() const { return _type_info; }
std::shared_ptr<const TypeInfo> type_info() const { return _type_info; }
size_t capacity() const { return _capacity; }
@ -105,11 +105,11 @@ public:
// Get thr idx's cell_ptr for write
virtual uint8_t* mutable_cell_ptr(size_t idx) = 0;
static Status create(size_t init_capacity, bool is_nullable, const TypeInfo* type_info,
static Status create(size_t init_capacity, bool is_nullable, std::shared_ptr<const TypeInfo> type_info,
Field* field, std::unique_ptr<ColumnVectorBatch>* column_vector_batch);
private:
const TypeInfo* _type_info;
std::shared_ptr<const TypeInfo> _type_info;
size_t _capacity;
DelCondSatisfied _delete_state;
const bool _nullable;
@ -119,7 +119,7 @@ private:
template <class ScalarCppType>
class ScalarColumnVectorBatch : public ColumnVectorBatch {
public:
explicit ScalarColumnVectorBatch(const TypeInfo* type_info, bool is_nullable);
explicit ScalarColumnVectorBatch(std::shared_ptr<const TypeInfo> type_info, bool is_nullable);
~ScalarColumnVectorBatch() override;
@ -177,7 +177,7 @@ private:
class ArrayColumnVectorBatch : public ColumnVectorBatch {
public:
explicit ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable,
explicit ArrayColumnVectorBatch(std::shared_ptr<const TypeInfo> type_info, bool is_nullable,
ScalarColumnVectorBatch<uint32_t>* offsets,
ColumnVectorBatch* elements);
~ArrayColumnVectorBatch() override;

View File

@ -278,7 +278,7 @@ public:
FieldType type() const { return _type_info->type(); }
FieldAggregationMethod aggregation() const { return _agg_info->agg_method(); }
const TypeInfo* type_info() const { return _type_info; }
std::shared_ptr<const TypeInfo> type_info() const { return _type_info; }
bool is_nullable() const { return _is_nullable; }
// similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value.
@ -301,7 +301,7 @@ public:
Field* get_sub_field(int i) { return _sub_fields[i].get(); }
protected:
const TypeInfo* _type_info;
std::shared_ptr<const TypeInfo> _type_info;
const AggregateInfo* _agg_info;
// unit : byte
// except for strings, other types have fixed lengths

View File

@ -1002,7 +1002,7 @@ OLAPStatus PushBrokerReader::fill_field_row(RowCursorCell* dst, const char* src,
if (src_null) {
break;
}
const TypeInfo* type_info = get_type_info(type);
auto type_info = get_type_info(type);
type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool);
break;
}

View File

@ -206,7 +206,7 @@ Status BinaryDictPageDecoder::init() {
if (_encoding_type == DICT_ENCODING) {
// copy the codewords into a temporary buffer first
// And then copy the strings corresponding to the codewords to the destination buffer
TypeInfo* type_info = get_scalar_type_info(OLAP_FIELD_TYPE_INT);
auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_INT);
RETURN_IF_ERROR(ColumnVectorBatch::create(0, false, type_info, nullptr, &_batch));
_data_page_decoder.reset(_bit_shuffle_ptr = new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));

View File

@ -51,13 +51,13 @@ public:
int64_t bitmap_nums() { return _bitmap_column_reader->num_values(); }
const TypeInfo* type_info() { return _typeinfo; }
std::shared_ptr<const TypeInfo> type_info() { return _typeinfo; }
private:
friend class BitmapIndexIterator;
FilePathDesc _path_desc;
const TypeInfo* _typeinfo;
std::shared_ptr<const TypeInfo> _typeinfo;
const BitmapIndexPB* _bitmap_index_meta;
bool _has_null = false;
std::unique_ptr<IndexedColumnReader> _dict_column_reader;

View File

@ -64,7 +64,7 @@ public:
using CppType = typename CppTypeTraits<field_type>::CppType;
using MemoryIndexType = typename BitmapIndexTraits<CppType>::MemoryIndexType;
explicit BitmapIndexWriterImpl(const TypeInfo* typeinfo)
explicit BitmapIndexWriterImpl(std::shared_ptr<const TypeInfo> typeinfo)
: _typeinfo(typeinfo),
_reverted_index_size(0),
_tracker(new MemTracker()),
@ -114,7 +114,7 @@ public:
IndexedColumnWriterOptions options;
options.write_ordinal_index = false;
options.write_value_index = true;
options.encoding = EncodingInfo::get_default_encoding(_typeinfo, true);
options.encoding = EncodingInfo::get_default_encoding(_typeinfo.get(), true);
options.compression = LZ4F;
IndexedColumnWriter dict_column_writer(options, _typeinfo, wblock);
@ -144,12 +144,12 @@ public:
bitmap_sizes.push_back(bitmap_size);
}
const TypeInfo* bitmap_typeinfo = get_type_info(OLAP_FIELD_TYPE_OBJECT);
auto bitmap_typeinfo = get_type_info(OLAP_FIELD_TYPE_OBJECT);
IndexedColumnWriterOptions options;
options.write_ordinal_index = true;
options.write_value_index = false;
options.encoding = EncodingInfo::get_default_encoding(bitmap_typeinfo, false);
options.encoding = EncodingInfo::get_default_encoding(bitmap_typeinfo.get(), false);
// we already store compressed bitmap, use NO_COMPRESSION to save some cpu
options.compression = NO_COMPRESSION;
@ -179,7 +179,7 @@ public:
}
private:
const TypeInfo* _typeinfo;
std::shared_ptr<const TypeInfo> _typeinfo;
uint64_t _reverted_index_size;
rowid_t _rid = 0;
// row id list for null value
@ -192,7 +192,7 @@ private:
} // namespace
Status BitmapIndexWriter::create(const TypeInfo* typeinfo,
Status BitmapIndexWriter::create(std::shared_ptr<const TypeInfo> typeinfo,
std::unique_ptr<BitmapIndexWriter>* res) {
FieldType type = typeinfo->type();
switch (type) {

View File

@ -36,7 +36,7 @@ namespace segment_v2 {
class BitmapIndexWriter {
public:
static Status create(const TypeInfo* typeinfo, std::unique_ptr<BitmapIndexWriter>* res);
static Status create(std::shared_ptr<const TypeInfo> typeinfo, std::unique_ptr<BitmapIndexWriter>* res);
BitmapIndexWriter() = default;
virtual ~BitmapIndexWriter() = default;

View File

@ -53,13 +53,13 @@ public:
// create a new column iterator.
Status new_iterator(std::unique_ptr<BloomFilterIndexIterator>* iterator);
const TypeInfo* type_info() const { return _typeinfo; }
std::shared_ptr<const TypeInfo> type_info() const { return _typeinfo; }
private:
friend class BloomFilterIndexIterator;
FilePathDesc _path_desc;
const TypeInfo* _typeinfo;
std::shared_ptr<const TypeInfo> _typeinfo;
const BloomFilterIndexPB* _bloom_filter_index_meta;
std::unique_ptr<IndexedColumnReader> _bloom_filter_reader;
};

View File

@ -69,7 +69,7 @@ public:
using ValueDict = typename BloomFilterTraits<CppType>::ValueDict;
explicit BloomFilterIndexWriterImpl(const BloomFilterOptions& bf_options,
const TypeInfo* typeinfo)
std::shared_ptr<const TypeInfo> typeinfo)
: _bf_options(bf_options),
_typeinfo(typeinfo),
_tracker(new MemTracker(-1, "BloomFilterIndexWriterImpl")),
@ -131,7 +131,7 @@ public:
meta->set_algorithm(BLOCK_BLOOM_FILTER);
// write bloom filters
const TypeInfo* bf_typeinfo = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
auto bf_typeinfo = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
IndexedColumnWriterOptions options;
options.write_ordinal_index = true;
options.write_value_index = false;
@ -162,7 +162,7 @@ private:
private:
BloomFilterOptions _bf_options;
const TypeInfo* _typeinfo;
std::shared_ptr<const TypeInfo> _typeinfo;
std::shared_ptr<MemTracker> _tracker;
MemPool _pool;
bool _has_null;
@ -176,7 +176,7 @@ private:
// TODO currently we don't support bloom filter index for tinyint/hll/float/double
Status BloomFilterIndexWriter::create(const BloomFilterOptions& bf_options,
const TypeInfo* typeinfo,
std::shared_ptr<const TypeInfo> typeinfo,
std::unique_ptr<BloomFilterIndexWriter>* res) {
FieldType type = typeinfo->type();
switch (type) {

View File

@ -38,7 +38,7 @@ struct BloomFilterOptions;
class BloomFilterIndexWriter {
public:
static Status create(const BloomFilterOptions& bf_options, const TypeInfo* typeinfo,
static Status create(const BloomFilterOptions& bf_options, std::shared_ptr<const TypeInfo> typeinfo,
std::unique_ptr<BloomFilterIndexWriter>* res);
BloomFilterIndexWriter() = default;

View File

@ -104,7 +104,7 @@ Status ColumnReader::init() {
return Status::NotSupported(
strings::Substitute("unsupported typeinfo, type=$0", _meta.type()));
}
RETURN_IF_ERROR(EncodingInfo::get(_type_info, _meta.encoding(), &_encoding_info));
RETURN_IF_ERROR(EncodingInfo::get(_type_info.get(), _meta.encoding(), &_encoding_info));
RETURN_IF_ERROR(get_block_compression_codec(_meta.compression(), &_compress_codec));
for (int i = 0; i < _meta.indexes_size(); i++) {
@ -388,7 +388,7 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) {
if (_array_reader->is_nullable()) {
RETURN_IF_ERROR(_null_iterator->init(opts));
}
TypeInfo* offset_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT);
auto offset_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT);
RETURN_IF_ERROR(
ColumnVectorBatch::create(1024, false, offset_type_info, nullptr, &_length_batch));
return Status::OK();

View File

@ -171,7 +171,7 @@ private:
uint64_t _num_rows;
FilePathDesc _path_desc;
const TypeInfo* _type_info = nullptr; // initialized in init(), may changed by subclasses.
std::shared_ptr<const TypeInfo> _type_info = nullptr; // initialized in init(), may changed by subclasses.
const EncodingInfo* _encoding_info =
nullptr; // initialized in init(), used for create PageDecoder
const BlockCompressionCodec* _compress_codec = nullptr; // initialized in init()
@ -376,7 +376,8 @@ private:
class DefaultValueColumnIterator : public ColumnIterator {
public:
DefaultValueColumnIterator(bool has_default_value, const std::string& default_value,
bool is_nullable, TypeInfo* type_info, size_t schema_length)
bool is_nullable, std::shared_ptr<const TypeInfo> type_info,
size_t schema_length)
: _has_default_value(has_default_value),
_default_value(default_value),
_is_nullable(is_nullable),
@ -416,7 +417,7 @@ private:
bool _has_default_value;
std::string _default_value;
bool _is_nullable;
TypeInfo* _type_info;
std::shared_ptr<const TypeInfo> _type_info;
size_t _schema_length;
bool _is_default_value_null;
size_t _type_size;

View File

@ -226,7 +226,7 @@ Status ScalarColumnWriter::init() {
PageBuilder* page_builder = nullptr;
RETURN_IF_ERROR(
EncodingInfo::get(get_field()->type_info(), _opts.meta->encoding(), &_encoding_info));
EncodingInfo::get(get_field()->type_info().get(), _opts.meta->encoding(), &_encoding_info));
_opts.meta->set_encoding(_encoding_info->encoding());
// create page builder
PageBuilderOptions opts;

View File

@ -36,7 +36,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) {
return Status::NotSupported(
strings::Substitute("unsupported typeinfo, type=$0", _meta.data_type()));
}
RETURN_IF_ERROR(EncodingInfo::get(_type_info, _meta.encoding(), &_encoding_info));
RETURN_IF_ERROR(EncodingInfo::get(_type_info.get(), _meta.encoding(), &_encoding_info));
RETURN_IF_ERROR(get_block_compression_codec(_meta.compression(), &_compress_codec));
_value_key_coder = get_key_coder(_type_info->type());

View File

@ -56,7 +56,7 @@ public:
int64_t num_values() const { return _num_values; }
const EncodingInfo* encoding_info() const { return _encoding_info; }
const TypeInfo* type_info() const { return _type_info; }
std::shared_ptr<const TypeInfo> type_info() const { return _type_info; }
bool support_ordinal_seek() const { return _meta.has_ordinal_index_meta(); }
bool support_value_seek() const { return _meta.has_value_index_meta(); }
@ -82,7 +82,7 @@ private:
PageHandle _ordinal_index_page_handle;
PageHandle _value_index_page_handle;
const TypeInfo* _type_info = nullptr;
std::shared_ptr<const TypeInfo> _type_info = nullptr;
const EncodingInfo* _encoding_info = nullptr;
const BlockCompressionCodec* _compress_codec = nullptr;
const KeyCoder* _value_key_coder = nullptr;

View File

@ -37,7 +37,7 @@ namespace doris {
namespace segment_v2 {
IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& options,
const TypeInfo* typeinfo, fs::WritableBlock* wblock)
std::shared_ptr<const TypeInfo> typeinfo, fs::WritableBlock* wblock)
: _options(options),
_typeinfo(typeinfo),
_wblock(wblock),
@ -54,7 +54,7 @@ IndexedColumnWriter::~IndexedColumnWriter() = default;
Status IndexedColumnWriter::init() {
const EncodingInfo* encoding_info;
RETURN_IF_ERROR(EncodingInfo::get(_typeinfo, _options.encoding, &encoding_info));
RETURN_IF_ERROR(EncodingInfo::get(_typeinfo.get(), _options.encoding, &encoding_info));
_options.encoding = encoding_info->encoding();
// should store more concrete encoding type instead of DEFAULT_ENCODING
// because the default encoding of a data type can be changed in the future

View File

@ -71,7 +71,7 @@ struct IndexedColumnWriterOptions {
class IndexedColumnWriter {
public:
explicit IndexedColumnWriter(const IndexedColumnWriterOptions& options,
const TypeInfo* typeinfo, fs::WritableBlock* wblock);
std::shared_ptr<const TypeInfo> typeinfo, fs::WritableBlock* wblock);
~IndexedColumnWriter();
@ -88,7 +88,7 @@ private:
Status _flush_index(IndexPageBuilder* index_builder, BTreeMetaPB* meta);
IndexedColumnWriterOptions _options;
const TypeInfo* _typeinfo;
std::shared_ptr<const TypeInfo> _typeinfo;
fs::WritableBlock* _wblock;
// only used for `_first_value`
std::shared_ptr<MemTracker> _mem_tracker;

View File

@ -210,7 +210,7 @@ Status Segment::new_column_iterator(uint32_t cid, std::shared_ptr<MemTracker> pa
if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) {
return Status::InternalError("invalid nonexistent column without default value.");
}
TypeInfo* type_info = get_type_info(&tablet_column);
auto type_info = get_type_info(&tablet_column);
std::unique_ptr<DefaultValueColumnIterator> default_value_iter(
new DefaultValueColumnIterator(
tablet_column.has_default_value(), tablet_column.default_value(),

View File

@ -107,11 +107,11 @@ Status ZoneMapIndexWriter::finish(fs::WritableBlock* wblock, ColumnIndexMetaPB*
_segment_zone_map.to_proto(meta->mutable_segment_zone_map(), _field);
// write out zone map for each data pages
const TypeInfo* typeinfo = get_type_info(OLAP_FIELD_TYPE_OBJECT);
auto typeinfo = get_type_info(OLAP_FIELD_TYPE_OBJECT);
IndexedColumnWriterOptions options;
options.write_ordinal_index = true;
options.write_value_index = false;
options.encoding = EncodingInfo::get_default_encoding(typeinfo, false);
options.encoding = EncodingInfo::get_default_encoding(typeinfo.get(), false);
options.compression = NO_COMPRESSION; // currently not compressed
IndexedColumnWriter writer(options, typeinfo, wblock);

View File

@ -590,7 +590,7 @@ OLAPStatus RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t
const Field* ref_field = read_helper.column_schema(ref_column);
char* ref_value = read_helper.cell_ptr(ref_column);
OLAPStatus st = write_helper.convert_from(i, ref_value,
ref_field->type_info(), mem_pool);
ref_field->type_info().get(), mem_pool);
if (st != OLAPStatus::OLAP_SUCCESS) {
LOG(WARNING)
<< "the column type which was altered from was unsupported."

View File

@ -16,6 +16,7 @@
// under the License.
#include "olap/types.h"
#include <memory>
namespace doris {
@ -43,10 +44,10 @@ class ScalarTypeInfoResolver {
DECLARE_SINGLETON(ScalarTypeInfoResolver);
public:
TypeInfo* get_type_info(const FieldType t) {
std::shared_ptr<const TypeInfo> get_type_info(const FieldType t) {
auto pair = _scalar_type_mapping.find(t);
DCHECK(pair != _scalar_type_mapping.end()) << "Bad field type: " << t;
return pair->second.get();
return pair->second;
}
private:
@ -54,10 +55,10 @@ private:
void add_mapping() {
TypeTraits<field_type> traits;
_scalar_type_mapping.emplace(field_type,
std::shared_ptr<TypeInfo>(new ScalarTypeInfo(traits)));
std::shared_ptr<const TypeInfo>(new ScalarTypeInfo(traits)));
}
std::unordered_map<FieldType, std::shared_ptr<TypeInfo>, std::hash<size_t>>
std::unordered_map<FieldType, std::shared_ptr<const TypeInfo>, std::hash<size_t>>
_scalar_type_mapping;
DISALLOW_COPY_AND_ASSIGN(ScalarTypeInfoResolver);
@ -97,7 +98,7 @@ bool is_scalar_type(FieldType field_type) {
}
}
TypeInfo* get_scalar_type_info(FieldType field_type) {
std::shared_ptr<const TypeInfo> get_scalar_type_info(FieldType field_type) {
return ScalarTypeInfoResolver::instance()->get_type_info(field_type);
}
@ -105,10 +106,31 @@ class ArrayTypeInfoResolver {
DECLARE_SINGLETON(ArrayTypeInfoResolver);
public:
TypeInfo* get_type_info(const FieldType t) {
std::shared_ptr<const TypeInfo> get_type_info(const FieldType t) {
auto pair = _type_mapping.find(t);
DCHECK(pair != _type_mapping.end()) << "Bad field type: list<" << t << ">";
return pair->second.get();
return pair->second;
}
std::shared_ptr<const TypeInfo> get_type_info(const TabletColumn& column) {
DCHECK(column.get_subtype_count() == 1) << "more than 1 child type.";
const auto &sub_column = column.get_sub_column(0);
if (is_scalar_type(sub_column.type())) {
return get_type_info(sub_column.type());
} else {
return std::make_shared<const ArrayTypeInfo>(get_type_info(sub_column));
}
}
std::shared_ptr<const TypeInfo> get_type_info(const segment_v2::ColumnMetaPB& column_meta_pb) {
DCHECK(column_meta_pb.children_columns_size() >= 1 && column_meta_pb.children_columns_size() <= 3)
<< "more than 3 children or no children.";
const auto& child_type = column_meta_pb.children_columns(0);
if (is_scalar_type((FieldType)child_type.type())) {
return get_type_info((FieldType)child_type.type());
} else {
return std::make_shared<const ArrayTypeInfo>(get_type_info(child_type));
}
}
private:
@ -116,11 +138,11 @@ private:
void add_mapping() {
_type_mapping.emplace(
item_type,
std::shared_ptr<TypeInfo>(new ArrayTypeInfo(get_scalar_type_info(item_type))));
std::shared_ptr<const TypeInfo>(new ArrayTypeInfo(get_scalar_type_info(item_type))));
}
// item_type_info -> list_type_info
std::unordered_map<FieldType, std::shared_ptr<TypeInfo>, std::hash<size_t>> _type_mapping;
std::unordered_map<FieldType, std::shared_ptr<const TypeInfo>, std::hash<size_t>> _type_mapping;
};
ArrayTypeInfoResolver::~ArrayTypeInfoResolver() = default;
@ -144,27 +166,23 @@ ArrayTypeInfoResolver::ArrayTypeInfoResolver() {
}
// equal to get_scalar_type_info
TypeInfo* get_type_info(FieldType field_type) {
std::shared_ptr<const TypeInfo> get_type_info(FieldType field_type) {
return get_scalar_type_info(field_type);
}
// get array array type info
TypeInfo* get_collection_type_info(FieldType sub_type) {
std::shared_ptr<const TypeInfo> get_collection_type_info(FieldType sub_type) {
return ArrayTypeInfoResolver::instance()->get_type_info(sub_type);
}
TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) {
std::shared_ptr<const TypeInfo> get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) {
FieldType type = (FieldType)column_meta_pb->type();
if (is_scalar_type(type)) {
return get_scalar_type_info(type);
} else {
switch (type) {
case OLAP_FIELD_TYPE_ARRAY: {
DCHECK(column_meta_pb->children_columns_size() >= 1 &&
column_meta_pb->children_columns_size() <= 3)
<< "more than 3 children or no children.";
auto child_type = (FieldType)column_meta_pb->children_columns(0).type();
return ArrayTypeInfoResolver::instance()->get_type_info(child_type);
return ArrayTypeInfoResolver::instance()->get_type_info(*column_meta_pb);
}
default:
DCHECK(false) << "Bad field type: " << type;
@ -173,14 +191,13 @@ TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) {
}
}
TypeInfo* get_type_info(const TabletColumn* col) {
std::shared_ptr<const TypeInfo> get_type_info(const TabletColumn* col) {
if (is_scalar_type(col->type())) {
return get_scalar_type_info(col->type());
} else {
switch (col->type()) {
case OLAP_FIELD_TYPE_ARRAY:
DCHECK(col->get_subtype_count() == 1) << "more than 1 child type.";
return ArrayTypeInfoResolver::instance()->get_type_info(col->get_sub_column(0).type());
return ArrayTypeInfoResolver::instance()->get_type_info(*col);
default:
DCHECK(false) << "Bad field type: " << col->type();
return nullptr;

View File

@ -161,7 +161,7 @@ private:
class ArrayTypeInfo : public TypeInfo {
public:
explicit ArrayTypeInfo(TypeInfo* item_type_info)
explicit ArrayTypeInfo(std::shared_ptr<const TypeInfo> item_type_info)
: _item_type_info(item_type_info), _item_size(item_type_info->size()) {}
~ArrayTypeInfo() = default;
inline bool equal(const void* left, const void* right) const override {
@ -357,24 +357,24 @@ public:
inline FieldType type() const override { return OLAP_FIELD_TYPE_ARRAY; }
inline const TypeInfo* item_type_info() const { return _item_type_info; }
inline std::shared_ptr<const TypeInfo> item_type_info() const { return _item_type_info; }
private:
const TypeInfo* _item_type_info;
std::shared_ptr<const TypeInfo> _item_type_info;
const size_t _item_size;
};
extern bool is_scalar_type(FieldType field_type);
extern TypeInfo* get_scalar_type_info(FieldType field_type);
extern std::shared_ptr<const TypeInfo> get_scalar_type_info(FieldType field_type);
extern TypeInfo* get_collection_type_info(FieldType sub_type);
extern std::shared_ptr<const TypeInfo> get_collection_type_info(FieldType sub_type);
extern TypeInfo* get_type_info(FieldType field_type);
extern std::shared_ptr<const TypeInfo> get_type_info(FieldType field_type);
extern TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb);
extern std::shared_ptr<const TypeInfo> get_type_info(segment_v2::ColumnMetaPB* column_meta_pb);
extern TypeInfo* get_type_info(const TabletColumn* col);
extern std::shared_ptr<const TypeInfo> get_type_info(const TabletColumn* col);
// support following formats when convert varchar to date
static const std::vector<std::string> DATE_FORMATS {

View File

@ -42,7 +42,7 @@ private:
};
template <FieldType type>
void test_read_write_scalar_column_vector(const TypeInfo* type_info, const uint8_t* src_data,
void test_read_write_scalar_column_vector(std::shared_ptr<const TypeInfo> type_info, const uint8_t* src_data,
size_t data_size) {
using Type = typename TypeTraits<type>::CppType;
Type* src = (Type*)src_data;
@ -72,7 +72,7 @@ void test_read_write_scalar_column_vector(const TypeInfo* type_info, const uint8
}
template <FieldType item_type>
void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, size_t array_size,
void test_read_write_array_column_vector(std::shared_ptr<const TypeInfo> array_type_info, size_t array_size,
CollectionValue* result) {
DCHECK(array_size > 1);
@ -145,7 +145,7 @@ TEST_F(ColumnVectorTest, scalar_column_vector_test) {
for (int i = 0; i < size; ++i) {
val[i] = i;
}
const TypeInfo* ti = get_scalar_type_info(OLAP_FIELD_TYPE_TINYINT);
auto ti = get_scalar_type_info(OLAP_FIELD_TYPE_TINYINT);
test_read_write_scalar_column_vector<OLAP_FIELD_TYPE_TINYINT>(ti, val, size);
delete[] val;
}
@ -155,7 +155,7 @@ TEST_F(ColumnVectorTest, scalar_column_vector_test) {
for (int i = 0; i < size; ++i) {
set_column_value_by_type(OLAP_FIELD_TYPE_CHAR, i, (char*)&char_vals[i], &_pool, 8);
}
const TypeInfo* ti = get_scalar_type_info(OLAP_FIELD_TYPE_CHAR);
auto ti = get_scalar_type_info(OLAP_FIELD_TYPE_CHAR);
test_read_write_scalar_column_vector<OLAP_FIELD_TYPE_CHAR>(ti, (uint8_t*)char_vals, size);
delete[] char_vals;
}
@ -179,8 +179,7 @@ TEST_F(ColumnVectorTest, array_column_vector_test) {
array_val[array_index].set_length(3);
}
}
auto type_info = reinterpret_cast<ArrayTypeInfo*>(
ArrayTypeInfoResolver::instance()->get_type_info(OLAP_FIELD_TYPE_TINYINT));
auto type_info = ArrayTypeInfoResolver::instance()->get_type_info(OLAP_FIELD_TYPE_TINYINT);
test_read_write_array_column_vector<OLAP_FIELD_TYPE_TINYINT>(type_info, num_array,
array_val);

View File

@ -90,7 +90,7 @@ public:
//check values
auto tracker = std::make_shared<MemTracker>();
MemPool pool(tracker.get());
TypeInfo* type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
size_t size = slices.size();
std::unique_ptr<ColumnVectorBatch> cvb;
ColumnVectorBatch::create(size, false, type_info, nullptr, &cvb);
@ -187,7 +187,7 @@ public:
//check values
auto tracker = std::make_shared<MemTracker>();
MemPool pool(tracker.get());
TypeInfo* type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
std::unique_ptr<ColumnVectorBatch> cvb;
ColumnVectorBatch::create(1, false, type_info, nullptr, &cvb);
ColumnBlock column_block(cvb.get(), &pool);

View File

@ -76,7 +76,7 @@ public:
//check values
auto tracker = std::make_shared<MemTracker>();
MemPool pool(tracker.get());
TypeInfo* type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
size_t size = slices.size();
std::unique_ptr<ColumnVectorBatch> cvb;
ColumnVectorBatch::create(size, false, type_info, nullptr, &cvb);

View File

@ -62,7 +62,7 @@ private:
template <FieldType type>
void write_index_file(std::string& filename, const void* values, size_t value_count,
size_t null_count, ColumnIndexMetaPB* meta) {
const TypeInfo* type_info = get_type_info(type);
auto type_info = get_type_info(type);
{
std::unique_ptr<fs::WritableBlock> wblock;
fs::CreateBlockOptions opts(filename);

View File

@ -53,7 +53,7 @@ template <FieldType type>
void write_bloom_filter_index_file(const std::string& file_name, const void* values,
size_t value_count, size_t null_count,
ColumnIndexMetaPB* index_meta) {
const TypeInfo* type_info = get_type_info(type);
auto type_info = get_type_info(type);
using CppType = typename CppTypeTraits<type>::CppType;
FileUtils::create_dir(dname);
std::string fname = dname + "/" + file_name;

View File

@ -123,7 +123,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows,
// close the file
ASSERT_TRUE(wblock->close().ok());
}
const TypeInfo* type_info = get_scalar_type_info(type);
auto type_info = get_scalar_type_info(type);
// read and check
{
// sequence read
@ -312,7 +312,7 @@ void test_array_nullable_data(CollectionValue* src_data, uint8_t* src_is_null, i
// close the file
ASSERT_TRUE(wblock->close().ok());
}
TypeInfo* type_info = get_type_info(&meta);
auto type_info = get_type_info(&meta);
// read and check
{
@ -462,7 +462,7 @@ TEST_F(ColumnReaderWriterTest, test_array_type) {
template <FieldType type>
void test_read_default_value(string value, void* result) {
using Type = typename TypeTraits<type>::CppType;
TypeInfo* type_info = get_type_info(type);
auto type_info = get_type_info(type);
// read and check
{
TabletColumn tablet_column = create_with_default_value<type>(value);
@ -574,7 +574,7 @@ static vectorized::MutableColumnPtr create_vectorized_column_ptr(FieldType type)
template <FieldType type>
void test_v_read_default_value(string value, void* result) {
using Type = typename TypeTraits<type>::CppType;
TypeInfo* type_info = get_type_info(type);
auto type_info = get_type_info(type);
// read and check
{
TabletColumn tablet_column = create_with_default_value<type>(value);

View File

@ -37,7 +37,7 @@ public:
TEST_F(EncodingInfoTest, normal) {
auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_BIGINT);
const EncodingInfo* encoding_info = nullptr;
auto status = EncodingInfo::get(type_info, PLAIN_ENCODING, &encoding_info);
auto status = EncodingInfo::get(type_info.get(), PLAIN_ENCODING, &encoding_info);
ASSERT_TRUE(status.ok());
ASSERT_NE(nullptr, encoding_info);
}
@ -45,7 +45,7 @@ TEST_F(EncodingInfoTest, normal) {
TEST_F(EncodingInfoTest, no_encoding) {
auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_BIGINT);
const EncodingInfo* encoding_info = nullptr;
auto status = EncodingInfo::get(type_info, DICT_ENCODING, &encoding_info);
auto status = EncodingInfo::get(type_info.get(), DICT_ENCODING, &encoding_info);
ASSERT_FALSE(status.ok());
}

View File

@ -231,7 +231,7 @@ public:
_col_vector.reset(new ColumnVector());
ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS);
char* data = reinterpret_cast<char*>(_col_vector->col_data());
auto st = read_row.convert_from(0, data, write_row.column_schema(0)->type_info(),
auto st = read_row.convert_from(0, data, write_row.column_schema(0)->type_info().get(),
_mem_pool.get());
ASSERT_EQ(st, expected_st);
if (st == OLAP_SUCCESS) {
@ -239,8 +239,8 @@ public:
ASSERT_TRUE(dst_str.compare(0, expected_val.size(), expected_val) == 0);
}
TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL);
st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get());
auto tp = get_type_info(OLAP_FIELD_TYPE_HLL);
st = read_row.convert_from(0, read_row.cell_ptr(0), tp.get(), _mem_pool.get());
ASSERT_EQ(st, OLAP_ERR_INVALID_SCHEMA);
}
@ -275,7 +275,7 @@ public:
_col_vector.reset(new ColumnVector());
ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS);
char* data = reinterpret_cast<char*>(_col_vector->col_data());
auto st = read_row.convert_from(0, data, write_row.column_schema(0)->type_info(),
auto st = read_row.convert_from(0, data, write_row.column_schema(0)->type_info().get(),
_mem_pool.get());
ASSERT_EQ(st, expected_st);
if (st == OLAP_SUCCESS) {
@ -283,8 +283,8 @@ public:
ASSERT_TRUE(dst_str.compare(0, value.size(), value) == 0);
}
TypeInfo* tp = get_scalar_type_info(OLAP_FIELD_TYPE_HLL);
st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get());
auto tp = get_scalar_type_info(OLAP_FIELD_TYPE_HLL);
st = read_row.convert_from(0, read_row.cell_ptr(0), tp.get(), _mem_pool.get());
ASSERT_EQ(st, OLAP_ERR_INVALID_SCHEMA);
}
@ -346,7 +346,7 @@ TEST_F(TestColumn, ConvertFloatToDouble) {
_col_vector.reset(new ColumnVector());
ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 2, _mem_pool.get()), OLAP_SUCCESS);
char* data = reinterpret_cast<char*>(_col_vector->col_data());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info().get(), _mem_pool.get());
//float val1 = *reinterpret_cast<float*>(read_row.cell_ptr(0));
double val2 = *reinterpret_cast<double*>(read_row.cell_ptr(0));
@ -358,8 +358,8 @@ TEST_F(TestColumn, ConvertFloatToDouble) {
ASSERT_EQ(v2, 1.234);
//test not support type
TypeInfo* tp = get_scalar_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, data, tp, _mem_pool.get());
auto tp = get_scalar_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, data, tp.get(), _mem_pool.get());
ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA);
}
@ -397,13 +397,13 @@ TEST_F(TestColumn, ConvertDatetimeToDate) {
_col_vector.reset(new ColumnVector());
ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS);
char* data = reinterpret_cast<char*>(_col_vector->col_data());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info().get(), _mem_pool.get());
std::string dest_string = read_row.column_schema(0)->to_string(read_row.cell_ptr(0));
ASSERT_TRUE(strncmp(dest_string.c_str(), "2019-11-25", strlen("2019-11-25")) == 0);
//test not support type
TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, data, tp, _mem_pool.get());
auto tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, data, tp.get(), _mem_pool.get());
ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA);
}
@ -442,13 +442,13 @@ TEST_F(TestColumn, ConvertDateToDatetime) {
ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS);
char* data = reinterpret_cast<char*>(_col_vector->col_data());
read_row.set_field_content(0, data, _mem_pool.get());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info().get(), _mem_pool.get());
std::string dest_string = read_row.column_schema(0)->to_string(read_row.cell_ptr(0));
ASSERT_TRUE(dest_string.compare("2019-12-04 00:00:00") == 0);
//test not support type
TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, data, tp, _mem_pool.get());
auto tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, data, tp.get(), _mem_pool.get());
ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA);
}
@ -484,13 +484,13 @@ TEST_F(TestColumn, ConvertIntToDate) {
_col_vector.reset(new ColumnVector());
ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS);
char* data = reinterpret_cast<char*>(_col_vector->col_data());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info().get(), _mem_pool.get());
std::string dest_string = read_row.column_schema(0)->to_string(read_row.cell_ptr(0));
ASSERT_TRUE(strncmp(dest_string.c_str(), "2019-12-05", strlen("2019-12-05")) == 0);
//test not support type
TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get());
auto tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, read_row.cell_ptr(0), tp.get(), _mem_pool.get());
ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA);
}
@ -532,7 +532,7 @@ TEST_F(TestColumn, ConvertVarcharToDate) {
_col_vector.reset(new ColumnVector());
ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS);
char* data = reinterpret_cast<char*>(_col_vector->col_data());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get());
read_row.convert_from(0, data, write_row.column_schema(0)->type_info().get(), _mem_pool.get());
std::string dst_str = read_row.column_schema(0)->to_string(read_row.cell_ptr(0));
ASSERT_EQ(expected_val, dst_str);
}
@ -544,8 +544,8 @@ TEST_F(TestColumn, ConvertVarcharToDate) {
read_row.init(convert_tablet_schema);
//test not support type
TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get());
auto tp = get_type_info(OLAP_FIELD_TYPE_HLL);
OLAPStatus st = read_row.convert_from(0, read_row.cell_ptr(0), tp.get(), _mem_pool.get());
ASSERT_EQ(st, OLAP_ERR_INVALID_SCHEMA);
}

View File

@ -33,7 +33,7 @@ public:
template <FieldType field_type>
void common_test(typename TypeTraits<field_type>::CppType src_val) {
TypeInfo* type = get_scalar_type_info(field_type);
auto type = get_scalar_type_info(field_type);
ASSERT_EQ(field_type, type->type());
ASSERT_EQ(sizeof(src_val), type->size());
@ -72,7 +72,7 @@ template <FieldType fieldType>
void test_char(Slice src_val) {
Field* field = FieldFactory::create_by_type(fieldType);
field->_length = src_val.size;
const TypeInfo* type = field->type_info();
auto type = field->type_info();
ASSERT_EQ(field->type(), fieldType);
ASSERT_EQ(sizeof(src_val), type->size());
@ -155,8 +155,7 @@ void common_test_array(CollectionValue src_val) {
TabletColumn item_column(OLAP_FIELD_AGGREGATION_NONE, item_type, true, 0, item_length);
list_column.add_sub_column(item_column);
auto* array_type = dynamic_cast<ArrayTypeInfo*>(get_type_info(&list_column));
auto array_type = dynamic_cast<const ArrayTypeInfo*>(get_type_info(&list_column).get());
ASSERT_EQ(item_type, array_type->item_type_info()->type());
{ // test deep copy

View File

@ -190,7 +190,7 @@ public:
auto tracker = std::make_shared<MemTracker>();
MemPool pool(tracker.get());
TypeInfo* type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
std::unique_ptr<ColumnVectorBatch> cvb;
ColumnVectorBatch::create(num, false, type_info, nullptr, &cvb);
ColumnBlock column_block(cvb.get(), &pool);