[feature-wip](multi-catalog) implement predicate pushdown in native OrcReader (#13453)

# Proposed changes
Implement predicate pushdown in `OrcReader` by converting doris `ColumnValueRange` to orc `SearchArgument`.

## Remaining problems
1. Orc support `not in`, which may have effect on bloom filter. However, doris `ScanNode` has not push down `not in` to file scanner.
2. Orc support `is null`, and row range has `hasNull` identifier. However,  `_contain_null` in `ColumnValueRange` is ambiguous. `_contain_null = true` only means that the value can be nullable, not equal to null.
3. `DateTimeV2` has lost microsecond precision in `ColumnValueRange`, which may cause filtering error when a min-max value equals to the predicate value.
4. `DateTimeV1`  is not accurate enough, and only saved to seconds.
5. Orc support the predicate pushdown of `float&double` type, but doris has not push down `float&double` type for precision reason.
This commit is contained in:
Ashin Gau
2022-10-20 10:07:36 +08:00
committed by GitHub
parent 8637ac1ca3
commit f7c69ade18
3 changed files with 282 additions and 3 deletions

View File

@ -254,6 +254,8 @@ public:
_contain_null = contain_null;
};
int precision() const { return _precision; }
int scale() const { return _scale; }
static void add_fixed_value_range(ColumnValueRange<primitive_type>& range, CppType* value) {

View File

@ -17,6 +17,10 @@
#include "vorc_reader.h"
#include <tuple>
#include "cctz/civil_time.h"
#include "cctz/time_zone.h"
#include "gutil/strings/substitute.h"
#include "io/file_factory.h"
#include "vec/columns/column_array.h"
@ -145,7 +149,7 @@ Status OrcReader::init_reader(
_row_reader_options.range(_range_start_offset, _range_size);
_row_reader_options.setTimezoneName(_ctz);
RETURN_IF_ERROR(_init_read_columns());
// _init_search_argument(colname_to_value_range);
_init_search_argument(colname_to_value_range);
_row_reader_options.include(_read_cols);
try {
_row_reader = _reader->createRowReader(_row_reader_options);
@ -181,11 +185,282 @@ Status OrcReader::_init_read_columns() {
return Status::OK();
}
struct OrcPredicate {
std::string col_name;
orc::PredicateDataType data_type;
std::vector<orc::Literal> literals;
SQLFilterOp op;
};
// orc only support LONG, FLOAT, STRING, DATE, DECIMAL, TIMESTAMP, BOOLEAN to push down predicates
static std::unordered_map<orc::TypeKind, orc::PredicateDataType> TYPEKIND_TO_PREDICATE_TYPE = {
{orc::TypeKind::BYTE, orc::PredicateDataType::LONG},
{orc::TypeKind::SHORT, orc::PredicateDataType::LONG},
{orc::TypeKind::INT, orc::PredicateDataType::LONG},
{orc::TypeKind::LONG, orc::PredicateDataType::LONG},
{orc::TypeKind::FLOAT, orc::PredicateDataType::FLOAT},
{orc::TypeKind::DOUBLE, orc::PredicateDataType::FLOAT},
{orc::TypeKind::STRING, orc::PredicateDataType::STRING},
{orc::TypeKind::BINARY, orc::PredicateDataType::STRING},
{orc::TypeKind::CHAR, orc::PredicateDataType::STRING},
{orc::TypeKind::VARCHAR, orc::PredicateDataType::STRING},
{orc::TypeKind::DATE, orc::PredicateDataType::DATE},
{orc::TypeKind::DECIMAL, orc::PredicateDataType::DECIMAL},
{orc::TypeKind::TIMESTAMP, orc::PredicateDataType::TIMESTAMP},
{orc::TypeKind::BOOLEAN, orc::PredicateDataType::BOOLEAN}};
template <typename CppType>
static std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type,
const void* value, int precision,
int scale) {
try {
switch (type->getKind()) {
case orc::TypeKind::BOOLEAN:
return std::make_tuple(true, orc::Literal(bool(*((uint8_t*)value))));
case orc::TypeKind::BYTE:
return std::make_tuple(true, orc::Literal(int64_t(*((int8_t*)value))));
case orc::TypeKind::SHORT:
return std::make_tuple(true, orc::Literal(int64_t(*((int16_t*)value))));
case orc::TypeKind::INT:
return std::make_tuple(true, orc::Literal(int64_t(*((int32_t*)value))));
case orc::TypeKind::LONG:
return std::make_tuple(true, orc::Literal(*((int64_t*)value)));
case orc::TypeKind::FLOAT:
return std::make_tuple(true, orc::Literal(double(*((float*)value))));
case orc::TypeKind::DOUBLE:
return std::make_tuple(true, orc::Literal(*((double*)value)));
case orc::TypeKind::STRING:
case orc::TypeKind::BINARY:
case orc::TypeKind::CHAR:
case orc::TypeKind::VARCHAR: {
StringValue* string_value = (StringValue*)value;
return std::make_tuple(true, orc::Literal(string_value->ptr, string_value->len));
}
case orc::TypeKind::DECIMAL: {
int128_t decimal_value;
if constexpr (std::is_same_v<CppType, DecimalV2Value>) {
decimal_value = *reinterpret_cast<const int128_t*>(value);
precision = DecimalV2Value::PRECISION;
scale = DecimalV2Value::SCALE;
} else if constexpr (std::is_same_v<CppType, int32_t>) {
decimal_value = *((int32_t*)value);
} else if constexpr (std::is_same_v<CppType, int64_t>) {
decimal_value = *((int64_t*)value);
} else {
decimal_value = *((int128_t*)value);
}
return std::make_tuple(true, orc::Literal(orc::Int128(uint64_t(decimal_value >> 64),
uint64_t(decimal_value)),
precision, scale));
}
case orc::TypeKind::DATE: {
int64_t day_offset;
static const cctz::time_zone utc0 = cctz::utc_time_zone();
if constexpr (std::is_same_v<CppType, DateTimeValue>) {
const DateTimeValue date_v1 = *reinterpret_cast<const DateTimeValue*>(value);
cctz::civil_day civil_date(date_v1.year(), date_v1.month(), date_v1.day());
day_offset =
cctz::convert(civil_date, utc0).time_since_epoch().count() / (24 * 60 * 60);
} else {
const DateV2Value<DateV2ValueType> date_v2 =
*reinterpret_cast<const DateV2Value<DateV2ValueType>*>(value);
cctz::civil_day civil_date(date_v2.year(), date_v2.month(), date_v2.day());
day_offset =
cctz::convert(civil_date, utc0).time_since_epoch().count() / (24 * 60 * 60);
}
return std::make_tuple(true, orc::Literal(orc::PredicateDataType::DATE, day_offset));
}
case orc::TypeKind::TIMESTAMP: {
int64_t seconds;
int32_t nanos;
static const cctz::time_zone utc0 = cctz::utc_time_zone();
// TODO: ColumnValueRange has lost the precision of microsecond
if constexpr (std::is_same_v<CppType, DateTimeValue>) {
const DateTimeValue datetime_v1 = *reinterpret_cast<const DateTimeValue*>(value);
cctz::civil_second civil_seconds(datetime_v1.year(), datetime_v1.month(),
datetime_v1.day(), datetime_v1.hour(),
datetime_v1.minute(), datetime_v1.second());
seconds = cctz::convert(civil_seconds, utc0).time_since_epoch().count();
nanos = datetime_v1.microsecond() * 1000;
} else {
const DateV2Value<DateTimeV2ValueType> datetime_v2 =
*reinterpret_cast<const DateV2Value<DateTimeV2ValueType>*>(value);
cctz::civil_second civil_seconds(datetime_v2.year(), datetime_v2.month(),
datetime_v2.day(), datetime_v2.hour(),
datetime_v2.minute(), datetime_v2.second());
seconds = cctz::convert(civil_seconds, utc0).time_since_epoch().count();
nanos = datetime_v2.microsecond() * 1000;
}
return std::make_tuple(true, orc::Literal(seconds, nanos));
}
default:
return std::make_tuple(false, orc::Literal(false));
}
} catch (Exception& e) {
// When table schema changed, and using new schema to read old data.
LOG(WARNING) << "Failed to convert doris value to orc predicate literal, error = "
<< e.what();
return std::make_tuple(false, orc::Literal(false));
}
}
template <PrimitiveType primitive_type>
static std::vector<OrcPredicate> value_range_to_predicate(
const ColumnValueRange<primitive_type>& col_val_range, const orc::Type* type) {
using CppType = typename PrimitiveTypeTraits<primitive_type>::CppType;
std::vector<OrcPredicate> predicates;
orc::PredicateDataType predicate_data_type;
auto type_it = TYPEKIND_TO_PREDICATE_TYPE.find(type->getKind());
if (type_it == TYPEKIND_TO_PREDICATE_TYPE.end()) {
// Unsupported type
return predicates;
} else {
predicate_data_type = type_it->second;
}
if (col_val_range.is_fixed_value_range()) {
OrcPredicate in_predicate;
in_predicate.col_name = col_val_range.column_name();
in_predicate.data_type = predicate_data_type;
in_predicate.op = SQLFilterOp::FILTER_IN;
for (const auto& value : col_val_range.get_fixed_value_set()) {
auto [valid, literal] = convert_to_orc_literal<CppType>(
type, &value, col_val_range.precision(), col_val_range.scale());
if (valid) {
in_predicate.literals.push_back(literal);
}
}
if (!in_predicate.literals.empty()) {
predicates.emplace_back(in_predicate);
}
return predicates;
}
const auto& high_value = col_val_range.get_range_max_value();
const auto& low_value = col_val_range.get_range_min_value();
const auto& high_op = col_val_range.get_range_high_op();
const auto& low_op = col_val_range.get_range_low_op();
// orc can only push down is_null. When col_value_range._contain_null = true, only indicating that
// value can be null, not equals null, so ignore _contain_null in col_value_range
if (col_val_range.is_high_value_maximum() && high_op == SQLFilterOp::FILTER_LESS_OR_EQUAL &&
col_val_range.is_low_value_mininum() && low_op == SQLFilterOp::FILTER_LARGER_OR_EQUAL) {
return predicates;
}
if (low_value < high_value) {
if (!col_val_range.is_low_value_mininum() ||
SQLFilterOp::FILTER_LARGER_OR_EQUAL != low_op) {
auto [valid, low_literal] = convert_to_orc_literal<CppType>(
type, &low_value, col_val_range.precision(), col_val_range.scale());
if (valid) {
OrcPredicate low_predicate;
low_predicate.col_name = col_val_range.column_name();
low_predicate.data_type = predicate_data_type;
low_predicate.op = low_op;
low_predicate.literals.emplace_back(low_literal);
predicates.emplace_back(low_predicate);
}
}
if (!col_val_range.is_high_value_maximum() ||
SQLFilterOp::FILTER_LESS_OR_EQUAL != high_op) {
auto [valid, high_literal] = convert_to_orc_literal<CppType>(
type, &high_value, col_val_range.precision(), col_val_range.scale());
if (valid) {
OrcPredicate high_predicate;
high_predicate.col_name = col_val_range.column_name();
high_predicate.data_type = predicate_data_type;
high_predicate.op = high_op;
high_predicate.literals.emplace_back(high_literal);
predicates.emplace_back(high_predicate);
}
}
}
return predicates;
}
bool static build_search_argument(std::vector<OrcPredicate>& predicates, int index,
std::unique_ptr<orc::SearchArgumentBuilder>& builder) {
if (index >= predicates.size()) {
return false;
}
if (index < predicates.size() - 1) {
builder->startAnd();
}
OrcPredicate& predicate = predicates[index];
switch (predicate.op) {
case SQLFilterOp::FILTER_IN: {
if (predicate.literals.size() == 1) {
builder->equals(predicate.col_name, predicate.data_type, predicate.literals[0]);
} else {
builder->in(predicate.col_name, predicate.data_type, predicate.literals);
}
break;
}
case SQLFilterOp::FILTER_LESS:
builder->lessThan(predicate.col_name, predicate.data_type, predicate.literals[0]);
break;
case SQLFilterOp::FILTER_LESS_OR_EQUAL:
builder->lessThanEquals(predicate.col_name, predicate.data_type, predicate.literals[0]);
break;
case SQLFilterOp::FILTER_LARGER: {
builder->startNot();
builder->lessThanEquals(predicate.col_name, predicate.data_type, predicate.literals[0]);
builder->end();
break;
}
case SQLFilterOp::FILTER_LARGER_OR_EQUAL: {
builder->startNot();
builder->lessThan(predicate.col_name, predicate.data_type, predicate.literals[0]);
builder->end();
break;
}
default:
return false;
}
if (index < predicates.size() - 1) {
bool can_build = build_search_argument(predicates, index + 1, builder);
if (!can_build) {
return false;
}
builder->end();
}
return true;
}
void OrcReader::_init_search_argument(
std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
if (colname_to_value_range->empty()) {
return;
}
std::vector<OrcPredicate> predicates;
auto& root_type = _reader->getType();
std::unordered_map<std::string, const orc::Type*> type_map;
for (int i = 0; i < root_type.getSubtypeCount(); ++i) {
type_map.emplace(root_type.getFieldName(i), root_type.getSubtype(i));
}
for (auto it = colname_to_value_range->begin(); it != colname_to_value_range->end(); ++it) {
auto type_it = type_map.find(it->first);
if (type_it != type_map.end()) {
std::visit(
[&](auto& range) {
std::vector<OrcPredicate> value_predicates =
value_range_to_predicate(range, type_it->second);
for (auto& range_predicate : value_predicates) {
predicates.emplace_back(range_predicate);
}
},
it->second);
}
}
if (predicates.empty()) {
return;
}
std::unique_ptr<orc::SearchArgumentBuilder> builder = orc::SearchArgumentFactory::newBuilder();
// generate min-max search argument
_row_reader_options.searchArgument(builder->build());
if (build_search_argument(predicates, 0, builder)) {
std::unique_ptr<orc::SearchArgument> sargs = builder->build();
_row_reader_options.searchArgument(std::move(sargs));
}
}
void OrcReader::_init_bloom_filter(
@ -217,6 +492,7 @@ TypeDescriptor OrcReader::_convert_to_doris_type(const orc::Type* orc_type) {
case orc::TypeKind::TIMESTAMP:
return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2);
case orc::TypeKind::DECIMAL:
// TODO: using decimal v3 instead
return TypeDescriptor(PrimitiveType::TYPE_DECIMALV2);
case orc::TypeKind::DATE:
return TypeDescriptor(PrimitiveType::TYPE_DATEV2);

View File

@ -266,6 +266,7 @@ Status FixLengthDecoder::_decode_datetime64(MutableColumnPtr& doris_column, Type
if constexpr (std::is_same_v<CppType, DateV2Value<DateTimeV2ValueType>>) {
// nanoseconds will be ignored.
v.set_microsecond((date_value % _decode_params->second_mask) * scale_to_micro);
// TODO: the precision of datetime v1
}
_FIXED_SHIFT_DATA_OFFSET();
}