[fix](OrcReader) fix the issue that orc_reader can not read DECIMAL(0,0) type of orc file #41795 (#42220)
cherry pick from #41795 Co-authored-by: Tiewei Fang <43782773+BePPPower@users.noreply.github.com>
This commit is contained in:
@ -95,6 +95,11 @@ namespace doris::vectorized {
|
||||
// TODO: we need to determine it by test.
|
||||
static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits<uint32_t>::max();
|
||||
static constexpr char EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = "";
|
||||
// Because HIVE 0.11 & 0.12 does not support precision and scale for decimal
|
||||
// The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0)
|
||||
// We should set a default precision and scale for these orc files.
|
||||
static constexpr int decimal_precision_for_hive11 = BeConsts::MAX_DECIMAL128_PRECISION;
|
||||
static constexpr int decimal_scale_for_hive11 = 10;
|
||||
|
||||
#define FOR_FLAT_ORC_COLUMNS(M) \
|
||||
M(TypeIndex::Int8, Int8, orc::LongVectorBatch) \
|
||||
@ -1050,6 +1055,10 @@ TypeDescriptor OrcReader::convert_to_doris_type(const orc::Type* orc_type) {
|
||||
case orc::TypeKind::TIMESTAMP:
|
||||
return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2);
|
||||
case orc::TypeKind::DECIMAL:
|
||||
if (orc_type->getPrecision() == 0) {
|
||||
return TypeDescriptor::create_decimalv3_type(decimal_precision_for_hive11,
|
||||
decimal_scale_for_hive11);
|
||||
}
|
||||
return TypeDescriptor::create_decimalv3_type(orc_type->getPrecision(),
|
||||
orc_type->getScale());
|
||||
case orc::TypeKind::DATE:
|
||||
|
||||
@ -587,7 +587,6 @@ private:
|
||||
std::unique_ptr<orc::Reader> _reader;
|
||||
std::unique_ptr<orc::RowReader> _row_reader;
|
||||
std::unique_ptr<ORCFilterImpl> _orc_filter;
|
||||
orc::ReaderOptions _reader_options;
|
||||
orc::RowReaderOptions _row_reader_options;
|
||||
|
||||
std::shared_ptr<io::FileSystem> _file_system;
|
||||
|
||||
@ -9,6 +9,13 @@
|
||||
2014-02-11
|
||||
8200-02-11
|
||||
|
||||
-- !test_2 --
|
||||
12345678.6547450000
|
||||
12345678.6547450000
|
||||
12345678.6547450000
|
||||
12345678.6547450000
|
||||
12345678.6547450000
|
||||
|
||||
-- !test_3 --
|
||||
2 foo 0.8 1 1969-12-31T16:00
|
||||
5 eat 0.8 6 1969-12-31T16:00:20
|
||||
|
||||
@ -21,3 +21,15 @@ row 000009
|
||||
Alyssa \N [3, 9, 15, 20]
|
||||
Ben red []
|
||||
|
||||
-- !test_4 --
|
||||
2 foo 0.8 1.2000000000 1969-12-31T16:00
|
||||
5 eat 0.8 5.5000000000 1969-12-31T16:00:20
|
||||
13 bar 80.0 2.2000000000 1969-12-31T16:00:05
|
||||
29 cat 8.0 3.3000000000 1969-12-31T16:00:10
|
||||
70 dog 1.8 4.4000000000 1969-12-31T16:00:15
|
||||
100 zebra 8.0 0E-10 1969-12-31T16:04:10
|
||||
100 zebra 8.0 0E-10 1969-12-31T16:04:10
|
||||
100 zebra 8.0 0E-10 1969-12-31T16:04:10
|
||||
100 zebra 8.0 0E-10 1969-12-31T16:04:10
|
||||
100 zebra 8.0 0E-10 1969-12-31T16:04:10
|
||||
|
||||
|
||||
@ -41,11 +41,11 @@ suite("test_hdfs_orc_group1_orc_files","external,hive,tvf,external_docker") {
|
||||
|
||||
// Doris cannot read this ORC file because of a NOT_IMPLEMENT error.
|
||||
|
||||
// uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
|
||||
// order_qt_test_2 """ select * from HDFS(
|
||||
// "uri" = "${uri}",
|
||||
// "hadoop.username" = "${hdfsUserName}",
|
||||
// "format" = "orc"); """
|
||||
uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
|
||||
order_qt_test_2 """ select decimal1 from HDFS(
|
||||
"uri" = "${uri}",
|
||||
"hadoop.username" = "${hdfsUserName}",
|
||||
"format" = "orc") limit 5; """
|
||||
|
||||
|
||||
uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc_split_elim.orc"
|
||||
|
||||
@ -49,6 +49,12 @@ suite("test_hdfs_orc_group2_orc_files","external,hive,tvf,external_docker") {
|
||||
"uri" = "${uri}",
|
||||
"hadoop.username" = "${hdfsUserName}",
|
||||
"format" = "orc"); """
|
||||
|
||||
uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group2/orc_split_elim.orc"
|
||||
qt_test_4 """ select * from HDFS(
|
||||
"uri" = "${uri}",
|
||||
"hadoop.username" = "${hdfsUserName}",
|
||||
"format" = "orc") order by userid limit 10; """
|
||||
} finally {
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user