[pick](Variant) casting to decimal type may lost precision (#39843)
#39650
This commit is contained in:
@ -118,7 +118,7 @@ public:
|
||||
Version version;
|
||||
int64_t tablet_id = 0;
|
||||
// slots that cast may be eliminated in storage layer
|
||||
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
|
||||
std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
|
||||
RowRanges row_ranges;
|
||||
size_t topn_limit = 0;
|
||||
};
|
||||
|
||||
@ -82,7 +82,7 @@ struct RowsetReaderContext {
|
||||
const std::set<int32_t>* output_columns = nullptr;
|
||||
RowsetId rowset_id;
|
||||
// slots that cast may be eliminated in storage layer
|
||||
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
|
||||
std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
|
||||
size_t topn_limit = 0;
|
||||
};
|
||||
|
||||
|
||||
@ -1816,7 +1816,7 @@ bool SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
|
||||
if (field_type == FieldType::OLAP_FIELD_TYPE_VARIANT) {
|
||||
// Use variant cast dst type
|
||||
field_type = TabletColumn::get_field_type_by_type(
|
||||
_opts.target_cast_type_for_variants[_schema->column(cid)->name()]);
|
||||
_opts.target_cast_type_for_variants[_schema->column(cid)->name()].type);
|
||||
}
|
||||
switch (predicate->type()) {
|
||||
case PredicateType::EQ:
|
||||
|
||||
@ -272,7 +272,8 @@ TabletColumn TabletReader::materialize_column(const TabletColumn& orig) {
|
||||
}
|
||||
TabletColumn column_with_cast_type = orig;
|
||||
auto cast_type = _reader_context.target_cast_type_for_variants.at(orig.name());
|
||||
column_with_cast_type.set_type(TabletColumn::get_field_type_by_type(cast_type));
|
||||
FieldType filed_type = TabletColumn::get_field_type_by_type(cast_type.type);
|
||||
column_with_cast_type.set_type(filed_type);
|
||||
return column_with_cast_type;
|
||||
}
|
||||
|
||||
|
||||
@ -136,7 +136,7 @@ public:
|
||||
std::vector<FunctionFilter> function_filters;
|
||||
std::vector<RowsetMetaSharedPtr> delete_predicates;
|
||||
// slots that cast may be eliminated in storage layer
|
||||
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
|
||||
std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
|
||||
|
||||
std::vector<RowSetSplits> rs_splits;
|
||||
// For unique key table with merge-on-write
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#include "pipeline/exec/meta_scan_operator.h"
|
||||
#include "pipeline/exec/olap_scan_operator.h"
|
||||
#include "pipeline/exec/operator.h"
|
||||
#include "runtime/types.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "vec/exec/runtime_filter_consumer.h"
|
||||
#include "vec/exec/scan/pip_scanner_context.h"
|
||||
@ -168,14 +169,14 @@ Status ScanLocalState<Derived>::_normalize_conjuncts() {
|
||||
// The conjuncts is always on output tuple, so use _output_tuple_desc;
|
||||
std::vector<SlotDescriptor*> slots = p._output_tuple_desc->slots();
|
||||
|
||||
auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
|
||||
switch (type) {
|
||||
#define M(NAME) \
|
||||
case TYPE_##NAME: { \
|
||||
ColumnValueRange<TYPE_##NAME> range(slot->col_name(), slot->is_nullable(), \
|
||||
slot->type().precision, slot->type().scale); \
|
||||
_slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \
|
||||
break; \
|
||||
auto init_value_range = [&](SlotDescriptor* slot, const TypeDescriptor& type_desc) {
|
||||
switch (type_desc.type) {
|
||||
#define M(NAME) \
|
||||
case TYPE_##NAME: { \
|
||||
ColumnValueRange<TYPE_##NAME> range(slot->col_name(), slot->is_nullable(), \
|
||||
type_desc.precision, type_desc.scale); \
|
||||
_slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \
|
||||
break; \
|
||||
}
|
||||
#define APPLY_FOR_PRIMITIVE_TYPE(M) \
|
||||
M(TINYINT) \
|
||||
@ -219,7 +220,7 @@ Status ScanLocalState<Derived>::_normalize_conjuncts() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
init_value_range(slots[slot_idx], slots[slot_idx]->type().type);
|
||||
init_value_range(slots[slot_idx], slots[slot_idx]->type());
|
||||
}
|
||||
|
||||
get_cast_types_for_variants();
|
||||
@ -631,7 +632,7 @@ Status ScanLocalState<Derived>::_normalize_in_and_eq_predicate(
|
||||
vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot,
|
||||
ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
slot->is_nullable(), slot->type().precision, slot->type().scale);
|
||||
slot->is_nullable(), range.precision(), range.scale());
|
||||
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
|
||||
if (TExprNodeType::IN_PRED == expr->node_type()) {
|
||||
HybridSetBase::IteratorBase* iter = nullptr;
|
||||
@ -787,7 +788,7 @@ Status ScanLocalState<Derived>::_normalize_not_in_and_not_eq_predicate(
|
||||
ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
|
||||
bool is_fixed_range = range.is_fixed_value_range();
|
||||
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
range.column_name(), slot->is_nullable(), slot->type().precision, slot->type().scale);
|
||||
range.column_name(), slot->is_nullable(), range.precision(), range.scale());
|
||||
vectorized::VScanNode::PushDownType temp_pdt =
|
||||
vectorized::VScanNode::PushDownType::UNACCEPTABLE;
|
||||
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
|
||||
@ -969,14 +970,14 @@ Status ScanLocalState<Derived>::_normalize_is_null_predicate(
|
||||
if (reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name ==
|
||||
"is_null_pred") {
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
slot->is_nullable(), slot->type().precision, slot->type().scale);
|
||||
slot->is_nullable(), range.precision(), range.scale());
|
||||
temp_range.set_contain_null(true);
|
||||
range.intersection(temp_range);
|
||||
*pdt = temp_pdt;
|
||||
} else if (reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name ==
|
||||
"is_not_null_pred") {
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
slot->is_nullable(), slot->type().precision, slot->type().scale);
|
||||
slot->is_nullable(), range.precision(), range.scale());
|
||||
temp_range.set_contain_null(false);
|
||||
range.intersection(temp_range);
|
||||
*pdt = temp_pdt;
|
||||
@ -1216,7 +1217,7 @@ Status ScanLocalState<Derived>::_normalize_match_predicate(
|
||||
|
||||
// create empty range as temp range, temp range should do intersection on range
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
slot->is_nullable(), slot->type().precision, slot->type().scale);
|
||||
slot->is_nullable(), range.precision(), range.scale());
|
||||
// Normalize match conjuncts like 'where col match value'
|
||||
|
||||
auto match_checker = [](const std::string& fn_name) { return is_match_condition(fn_name); };
|
||||
@ -1361,7 +1362,7 @@ Status ScanLocalState<Derived>::_init_profile() {
|
||||
template <typename Derived>
|
||||
void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
|
||||
const vectorized::VExpr* expr,
|
||||
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& colname_to_cast_types) {
|
||||
std::unordered_map<std::string, std::vector<TypeDescriptor>>& colname_to_cast_types) {
|
||||
const auto* cast_expr = dynamic_cast<const vectorized::VCastExpr*>(expr);
|
||||
if (cast_expr != nullptr) {
|
||||
const auto* src_slot =
|
||||
@ -1373,10 +1374,9 @@ void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
|
||||
}
|
||||
std::vector<SlotDescriptor*> slots = output_tuple_desc()->slots();
|
||||
SlotDescriptor* src_slot_desc = _slot_id_to_slot_desc[src_slot->slot_id()];
|
||||
PrimitiveType cast_dst_type =
|
||||
cast_expr->get_target_type()->get_type_as_type_descriptor().type;
|
||||
TypeDescriptor type_desc = cast_expr->get_target_type()->get_type_as_type_descriptor();
|
||||
if (src_slot_desc->type().is_variant_type()) {
|
||||
colname_to_cast_types[src_slot_desc->col_name()].push_back(cast_dst_type);
|
||||
colname_to_cast_types[src_slot_desc->col_name()].push_back(type_desc);
|
||||
}
|
||||
}
|
||||
for (const auto& child : expr->children()) {
|
||||
@ -1386,7 +1386,7 @@ void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
|
||||
|
||||
template <typename Derived>
|
||||
void ScanLocalState<Derived>::get_cast_types_for_variants() {
|
||||
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>> colname_to_cast_types;
|
||||
std::unordered_map<std::string, std::vector<TypeDescriptor>> colname_to_cast_types;
|
||||
for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
|
||||
auto& conjunct = *it;
|
||||
if (conjunct->root()) {
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "pipeline/pipeline_x/dependency.h"
|
||||
#include "pipeline/pipeline_x/operator.h"
|
||||
#include "runtime/descriptors.h"
|
||||
#include "runtime/types.h"
|
||||
#include "vec/exec/scan/vscan_node.h"
|
||||
|
||||
namespace doris {
|
||||
@ -329,7 +330,7 @@ protected:
|
||||
void get_cast_types_for_variants();
|
||||
void _filter_and_collect_cast_type_for_variant(
|
||||
const vectorized::VExpr* expr,
|
||||
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& colname_to_cast_types);
|
||||
std::unordered_map<std::string, std::vector<TypeDescriptor>>& colname_to_cast_types);
|
||||
|
||||
// Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in this vector
|
||||
// so that it will be destroyed uniformly at the end of the query.
|
||||
@ -344,7 +345,7 @@ protected:
|
||||
std::vector<FunctionFilter> _push_down_functions;
|
||||
|
||||
// colname -> cast dst type
|
||||
std::map<std::string, PrimitiveType> _cast_types_for_variants;
|
||||
std::map<std::string, TypeDescriptor> _cast_types_for_variants;
|
||||
|
||||
// slot id -> SlotDescriptor
|
||||
phmap::flat_hash_map<int, SlotDescriptor*> _slot_id_to_slot_desc;
|
||||
|
||||
@ -423,7 +423,7 @@ std::string NewOlapScanNode::get_name() {
|
||||
|
||||
void NewOlapScanNode::_filter_and_collect_cast_type_for_variant(
|
||||
const VExpr* expr,
|
||||
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& colname_to_cast_types) {
|
||||
phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>>& colname_to_cast_types) {
|
||||
auto* cast_expr = dynamic_cast<const VCastExpr*>(expr);
|
||||
if (cast_expr != nullptr) {
|
||||
auto* src_slot = cast_expr->get_child(0)->node_type() == TExprNodeType::SLOT_REF
|
||||
@ -446,7 +446,7 @@ void NewOlapScanNode::_filter_and_collect_cast_type_for_variant(
|
||||
}
|
||||
|
||||
void NewOlapScanNode::get_cast_types_for_variants() {
|
||||
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>> colname_to_cast_types;
|
||||
phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>> colname_to_cast_types;
|
||||
for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
|
||||
auto& conjunct = *it;
|
||||
if (conjunct->root()) {
|
||||
|
||||
@ -106,7 +106,7 @@ protected:
|
||||
void get_cast_types_for_variants() override;
|
||||
void _filter_and_collect_cast_type_for_variant(
|
||||
const VExpr* expr,
|
||||
phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& colname_to_cast_types);
|
||||
phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>>& colname_to_cast_types);
|
||||
|
||||
private:
|
||||
Status _build_key_ranges_and_filters();
|
||||
|
||||
@ -342,14 +342,14 @@ Status VScanNode::_normalize_conjuncts() {
|
||||
// The conjuncts is always on output tuple, so use _output_tuple_desc;
|
||||
std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
|
||||
|
||||
auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
|
||||
switch (type) {
|
||||
#define M(NAME) \
|
||||
case TYPE_##NAME: { \
|
||||
ColumnValueRange<TYPE_##NAME> range(slot->col_name(), slot->is_nullable(), \
|
||||
slot->type().precision, slot->type().scale); \
|
||||
_slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \
|
||||
break; \
|
||||
auto init_value_range = [&](SlotDescriptor* slot, TypeDescriptor type) {
|
||||
switch (type.type) {
|
||||
#define M(NAME) \
|
||||
case TYPE_##NAME: { \
|
||||
ColumnValueRange<TYPE_##NAME> range(slot->col_name(), slot->is_nullable(), type.precision, \
|
||||
type.scale); \
|
||||
_slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \
|
||||
break; \
|
||||
}
|
||||
#define APPLY_FOR_PRIMITIVE_TYPE(M) \
|
||||
M(TINYINT) \
|
||||
|
||||
@ -41,6 +41,7 @@
|
||||
#include "runtime/define_primitive_type.h"
|
||||
#include "runtime/query_context.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/types.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "vec/exec/runtime_filter_consumer.h"
|
||||
#include "vec/exec/scan/scanner_context.h"
|
||||
@ -308,7 +309,7 @@ protected:
|
||||
std::vector<FunctionFilter> _push_down_functions;
|
||||
|
||||
// colname -> cast dst type
|
||||
std::map<std::string, PrimitiveType> _cast_types_for_variants;
|
||||
std::map<std::string, TypeDescriptor> _cast_types_for_variants;
|
||||
|
||||
// slot id -> ColumnValueRange
|
||||
// Parsed from conjuncts
|
||||
|
||||
@ -78,3 +78,15 @@ user
|
||||
user
|
||||
user
|
||||
|
||||
-- !implicit_cast_14 --
|
||||
14690746673
|
||||
14690746676
|
||||
14690746679
|
||||
14690746680
|
||||
14690746681
|
||||
14690746684
|
||||
14690746685
|
||||
14690746687
|
||||
14690746688
|
||||
14690746689
|
||||
|
||||
|
||||
@ -12,4 +12,6 @@ SELECT v["payload"]["member"]["id"] FROM ghdata where v["payload"]["member"]["id
|
||||
select k, json_extract(v, '$.repo') from ghdata WHERE v["type"] = 'WatchEvent' order by k limit 10;
|
||||
-- SELECT v["payload"]["member"]["id"], count() FROM ghdata where v["payload"]["member"]["id"] is not null group by v["payload"]["member"]["id"] order by 1, 2 desc LIMIT 10;
|
||||
select k, v["id"], v["type"], v["repo"]["name"] from ghdata WHERE v["type"] = 'WatchEvent' order by k limit 10;
|
||||
SELECT v["payload"]["pusher_type"] FROM ghdata where v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
|
||||
SELECT v["payload"]["pusher_type"] FROM ghdata where v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
|
||||
-- implicit cast to decimal type
|
||||
SELECT v["id"] FROM ghdata where v["id"] not in (7273, 10.118626, -69352) order by cast(v["id"] as decimal) limit 10;
|
||||
Reference in New Issue
Block a user