From d1e91320ec0bc077ffb9d8ad3412d2fe59bdfc4e Mon Sep 17 00:00:00 2001 From: wjhh2008 Date: Mon, 24 Jun 2024 06:19:18 +0000 Subject: [PATCH] [FEAT MERGE] Support Parquet Format for External Table --- deps/init/oceanbase.el7.aarch64.deps | 1 + deps/init/oceanbase.el7.x86_64.deps | 1 + deps/init/oceanbase.el8.aarch64.deps | 1 + deps/init/oceanbase.el8.x86_64.deps | 1 + deps/init/oceanbase.el9.x86_64.deps | 1 + deps/oblib/src/CMakeLists.txt | 7 + deps/oblib/src/lib/ob_name_def.h | 3 + deps/oblib/unittest/lib/CMakeLists.txt | 1 + .../unittest/lib/parquet/test_parquet.cpp | 717 +++++++++ .../ob_external_table_utils.cpp | 4 +- .../external_table/ob_external_table_utils.h | 1 + .../ob_inner_table_schema.21401_21450.cpp | 6 +- .../ob_inner_table_schema.25201_25250.cpp | 4 +- .../inner_table/ob_inner_table_schema_def.py | 15 +- src/share/schema/ob_schema_printer.cpp | 28 +- src/sql/CMakeLists.txt | 2 + .../ob_static_engine_expr_cg.cpp | 20 + src/sql/engine/cmd/ob_load_data_parser.cpp | 51 +- src/sql/engine/cmd/ob_load_data_parser.h | 16 +- src/sql/engine/expr/ob_expr_cast.cpp | 2 +- src/sql/engine/expr/ob_expr_cast.h | 8 +- .../expr/ob_expr_extra_info_factory.cpp | 8 +- .../engine/expr/ob_expr_extra_info_factory.h | 9 +- src/sql/engine/expr/ob_expr_get_path.cpp | 45 + src/sql/engine/expr/ob_expr_get_path.h | 67 + src/sql/engine/expr/ob_expr_inner_trim.cpp | 1 + .../engine/expr/ob_expr_operator_factory.cpp | 3 + src/sql/engine/expr/ob_expr_trim.cpp | 1 + .../engine/expr/vector_cast/vector_cast.cpp | 3 +- .../ob_external_table_access_service.cpp | 127 +- .../table/ob_external_table_access_service.h | 37 +- .../table/ob_parquet_table_row_iter.cpp | 1409 +++++++++++++++++ .../engine/table/ob_parquet_table_row_iter.h | 236 +++ src/sql/printer/ob_raw_expr_printer.cpp | 1 + .../resolver/ddl/ob_alter_table_resolver.cpp | 3 +- .../resolver/ddl/ob_create_table_resolver.cpp | 48 +- .../resolver/ddl/ob_create_table_resolver.h | 1 + src/sql/resolver/ddl/ob_ddl_resolver.cpp | 28 +- src/sql/resolver/ddl/ob_ddl_resolver.h | 3 +- src/sql/resolver/dml/ob_dml_resolver.cpp | 160 +- src/sql/resolver/dml/ob_dml_resolver.h | 7 + src/sql/resolver/expr/ob_raw_expr.cpp | 5 +- src/sql/resolver/expr/ob_raw_expr.h | 6 +- src/sql/resolver/expr/ob_raw_expr_util.cpp | 2 +- src/sql/resolver/ob_resolver_utils.cpp | 373 ++++- src/sql/resolver/ob_resolver_utils.h | 55 +- .../r/mysql/desc_sys_views_in_mysql.result | 4 +- .../r/mysql/desc_sys_views_in_sys.result | 6 +- 48 files changed, 3321 insertions(+), 217 deletions(-) create mode 100644 deps/oblib/unittest/lib/parquet/test_parquet.cpp create mode 100644 src/sql/engine/expr/ob_expr_get_path.cpp create mode 100644 src/sql/engine/expr/ob_expr_get_path.h create mode 100644 src/sql/engine/table/ob_parquet_table_row_iter.cpp create mode 100644 src/sql/engine/table/ob_parquet_table_row_iter.h diff --git a/deps/init/oceanbase.el7.aarch64.deps b/deps/init/oceanbase.el7.aarch64.deps index ea49e7f8d..d43df008e 100644 --- a/deps/init/oceanbase.el7.aarch64.deps +++ b/deps/init/oceanbase.el7.aarch64.deps @@ -32,6 +32,7 @@ devdeps-cos-c-sdk-5.0.16-52023070517.el7.aarch64.rpm devdeps-s3-cpp-sdk-1.11.156-102023122011.el7.aarch64.rpm devdeps-protobuf-c-1.4.1-100000072023102410.el7.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el7.aarch64.rpm +devdeps-apache-arrow-9.0.0-302024052920.el7.aarch64.rpm [tools] obdevtools-binutils-2.30-12022100413.el7.aarch64.rpm diff --git a/deps/init/oceanbase.el7.x86_64.deps b/deps/init/oceanbase.el7.x86_64.deps index 619266d95..c2c09000e 100644 --- a/deps/init/oceanbase.el7.x86_64.deps +++ b/deps/init/oceanbase.el7.x86_64.deps @@ -35,6 +35,7 @@ devdeps-cloud-qpl-1.1.0-272023061419.el7.x86_64.rpm devdeps-s3-cpp-sdk-1.11.156-102023122011.el7.x86_64.rpm devdeps-protobuf-c-1.4.1-100000062023102016.el7.x86_64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el7.x86_64.rpm +devdeps-apache-arrow-9.0.0-222024052223.el7.x86_64.rpm [tools] obdevtools-binutils-2.30-12022100413.el7.x86_64.rpm diff --git a/deps/init/oceanbase.el8.aarch64.deps b/deps/init/oceanbase.el8.aarch64.deps index 6600a57e3..22f83d345 100644 --- a/deps/init/oceanbase.el8.aarch64.deps +++ b/deps/init/oceanbase.el8.aarch64.deps @@ -32,6 +32,7 @@ devdeps-cos-c-sdk-5.0.16-52023070517.el8.aarch64.rpm devdeps-s3-cpp-sdk-1.11.156-102023122011.el8.aarch64.rpm devdeps-protobuf-c-1.4.1-100000072023102410.el8.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.aarch64.rpm +devdeps-apache-arrow-9.0.0-322024052923.el8.aarch64.rpm [tools] obdevtools-binutils-2.30-12022100413.el8.aarch64.rpm diff --git a/deps/init/oceanbase.el8.x86_64.deps b/deps/init/oceanbase.el8.x86_64.deps index 9a231238b..4547efcef 100644 --- a/deps/init/oceanbase.el8.x86_64.deps +++ b/deps/init/oceanbase.el8.x86_64.deps @@ -34,6 +34,7 @@ devdeps-cloud-qpl-1.1.0-272023061419.el8.x86_64.rpm devdeps-s3-cpp-sdk-1.11.156-102023122011.el8.x86_64.rpm devdeps-protobuf-c-1.4.1-100000062023102016.el8.x86_64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.x86_64.rpm +devdeps-apache-arrow-9.0.0-172024052218.el8.x86_64.rpm [tools] obdevtools-binutils-2.30-12022100413.el8.x86_64.rpm diff --git a/deps/init/oceanbase.el9.x86_64.deps b/deps/init/oceanbase.el9.x86_64.deps index 6ab1c796b..e6fca5466 100644 --- a/deps/init/oceanbase.el9.x86_64.deps +++ b/deps/init/oceanbase.el9.x86_64.deps @@ -37,6 +37,7 @@ devdeps-cos-c-sdk-5.0.16-52023070517.el8.x86_64.rpm devdeps-cloud-qpl-1.1.0-272023061419.el8.x86_64.rpm devdeps-s3-cpp-sdk-1.11.156-102023122011.el8.x86_64.rpm devdeps-protobuf-c-1.4.1-100000062023102016.el8.x86_64.rpm +devdeps-apache-arrow-9.0.0-172024052218.el8.x86_64.rpm [deps-el9] devdeps-apr-1.6.5-232023090616.el9.x86_64.rpm target=el9 diff --git a/deps/oblib/src/CMakeLists.txt b/deps/oblib/src/CMakeLists.txt index 8af7aa6fb..2f3d48e07 100644 --- a/deps/oblib/src/CMakeLists.txt +++ b/deps/oblib/src/CMakeLists.txt @@ -22,6 +22,7 @@ target_include_directories( ${DEP_3RD_DIR}/usr/include/ ${DEP_DIR}/include/apr-1/ ${DEP_DIR}/include/icu/common + ${DEP_DIR}/include/apache-arrow ${USSL_INCLUDE_DIRS} ) @@ -200,6 +201,9 @@ target_link_libraries(oblib_base_base_base ${DEP_DIR}/lib/libicustubdata.a ${DEP_DIR}/lib/libicuuc.a ${DEP_DIR}/lib/libprotobuf-c.a + ${DEP_DIR}/lib64/libarrow.a + ${DEP_DIR}/lib64/libparquet.a + ${DEP_DIR}/lib64/libarrow_bundled_dependencies.a -L${DEP_DIR}/var/usr/lib64 -L${DEP_DIR}/var/usr/lib -L${DEP_3RD_DIR}/usr/lib @@ -226,6 +230,9 @@ target_link_libraries(oblib_base_base_base ${DEP_DIR}/lib/libicustubdata.a ${DEP_DIR}/lib/libicuuc.a ${DEP_DIR}/lib/libprotobuf-c.a + ${DEP_DIR}/lib64/libarrow.a + ${DEP_DIR}/lib64/libparquet.a + ${DEP_DIR}/lib64/libarrow_bundled_dependencies.a -L${DEP_DIR}/var/usr/lib64 -L${DEP_DIR}/var/usr/lib -L${DEP_3RD_DIR}/usr/lib diff --git a/deps/oblib/src/lib/ob_name_def.h b/deps/oblib/src/lib/ob_name_def.h index e4359c341..19f34f529 100644 --- a/deps/oblib/src/lib/ob_name_def.h +++ b/deps/oblib/src/lib/ob_name_def.h @@ -1068,6 +1068,8 @@ #define N_EXTERNAL_FILE_COLUMN_PREFIX "metadata$filecol" #define N_PARTITION_LIST_COL "metadata$partition_list_col" #define N_EXTERNAL_FILE_URL "metadata$fileurl" +#define N_EXTERNAL_FILE_ROW "external$filerow" + #define N_PREFIX_PATTERN "prefix_pattern" #define N_PRIV_XML_BINARY "_make_xml_binary" #define N_SYS_MAKEXML "sys_makexml" @@ -1148,4 +1150,5 @@ #define N_RB_ANDNOT_NULL2EMPTY "rb_andnot_null2empty" #define N_RB_TO_STRING "rb_to_string" #define N_RB_FROM_STRING "rb_from_string" +#define N_GET_PATH "get_path" #endif //OCEANBASE_LIB_OB_NAME_DEF_H_ diff --git a/deps/oblib/unittest/lib/CMakeLists.txt b/deps/oblib/unittest/lib/CMakeLists.txt index 1d9b22949..f7bfd2a08 100644 --- a/deps/oblib/unittest/lib/CMakeLists.txt +++ b/deps/oblib/unittest/lib/CMakeLists.txt @@ -5,6 +5,7 @@ # oblib_addtest(thread_local/test_itid.cpp) # oblib_addtest(time/test_ob_time_utility.cpp) # oblib_addtest(timezone/test_ob_timezone_utils.cpp) +oblib_addtest(parquet/test_parquet.cpp) oblib_addtest(alloc/test_alloc_struct.cpp) oblib_addtest(alloc/test_block_set.cpp) oblib_addtest(alloc/test_chunk_mgr.cpp) diff --git a/deps/oblib/unittest/lib/parquet/test_parquet.cpp b/deps/oblib/unittest/lib/parquet/test_parquet.cpp new file mode 100644 index 000000000..c2d23a718 --- /dev/null +++ b/deps/oblib/unittest/lib/parquet/test_parquet.cpp @@ -0,0 +1,717 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL + +#include "gtest/gtest.h" +#include "lib/oblog/ob_log.h" +#include "lib/oblog/ob_log_module.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "lib/allocator/page_arena.h" +#include "lib/file/ob_file.h" +#include "lib/file/file_directory_utils.h" +#include "lib/charset/ob_template_helper.h" +#include "lib/net/ob_net_util.h" + +#define USING_LOG_PREFIX SQL + +using namespace oceanbase::common; + +class TestParquet: public ::testing::Test +{ +public: + TestParquet(); + virtual ~TestParquet(); + virtual void SetUp(); + virtual void TearDown(); +}; + + +TestParquet::TestParquet() +{ +} + +TestParquet::~TestParquet() +{ +} + +void TestParquet::SetUp() +{ +} + +void TestParquet::TearDown() +{ +} + +constexpr int NUM_ROWS_PER_ROW_GROUP = 500; +const char PARQUET_FILENAME[] = "parquet_cpp_example.parquet"; + + +// #0 Build dummy data to pass around +// To have some input data, we first create an Arrow Table that holds +// some data. +std::shared_ptr generate_table() { + arrow::Int64Builder i64builder; + PARQUET_THROW_NOT_OK(i64builder.AppendValues({1, 2, 3, 4, 5})); + std::shared_ptr i64array; + PARQUET_THROW_NOT_OK(i64builder.Finish(&i64array)); + + arrow::StringBuilder strbuilder; + PARQUET_THROW_NOT_OK(strbuilder.Append("some")); + PARQUET_THROW_NOT_OK(strbuilder.Append("string")); + PARQUET_THROW_NOT_OK(strbuilder.Append("content")); + PARQUET_THROW_NOT_OK(strbuilder.Append("in")); + PARQUET_THROW_NOT_OK(strbuilder.Append("rows")); + std::shared_ptr strarray; + PARQUET_THROW_NOT_OK(strbuilder.Finish(&strarray)); + + std::shared_ptr schema = arrow::schema( + {arrow::field("int", arrow::int64()), arrow::field("str", arrow::utf8())}); + + return arrow::Table::Make(schema, {i64array, strarray}); +} + +// #1 Write out the data as a Parquet file +void write_parquet_file(const arrow::Table& table) { + std::shared_ptr outfile; + PARQUET_ASSIGN_OR_THROW( + outfile, arrow::io::FileOutputStream::Open("parquet-arrow-example.parquet")); + // The last argument to the function call is the size of the RowGroup in + // the parquet file. Normally you would choose this to be rather large but + // for the example, we use a small value to have multiple RowGroups. + PARQUET_THROW_NOT_OK( + parquet::arrow::WriteTable(table, arrow::default_memory_pool(), outfile, 3)); +} + +// #2: Fully read in the file +void read_whole_file() { + std::cout << "Reading parquet-arrow-example.parquet at once" << std::endl; + std::shared_ptr infile; + PARQUET_ASSIGN_OR_THROW(infile, + arrow::io::ReadableFile::Open("parquet-arrow-example.parquet", + arrow::default_memory_pool())); + + std::unique_ptr reader; + PARQUET_THROW_NOT_OK( + parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + std::shared_ptr table; + PARQUET_THROW_NOT_OK(reader->ReadTable(&table)); + std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns() + << " columns." << std::endl; +} + +// #3: Read only a single RowGroup of the parquet file +void read_single_rowgroup() { + std::cout << "Reading first RowGroup of parquet-arrow-example.parquet" << std::endl; + std::shared_ptr infile; + PARQUET_ASSIGN_OR_THROW(infile, + arrow::io::ReadableFile::Open("parquet-arrow-example.parquet", + arrow::default_memory_pool())); + + std::unique_ptr reader; + PARQUET_THROW_NOT_OK( + parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + std::shared_ptr table; + PARQUET_THROW_NOT_OK(reader->RowGroup(0)->ReadTable(&table)); + std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns() + << " columns." << std::endl; +} + +// #4: Read only a single column of the whole parquet file +void read_single_column() { + std::cout << "Reading first column of parquet-arrow-example.parquet" << std::endl; + std::shared_ptr infile; + PARQUET_ASSIGN_OR_THROW(infile, + arrow::io::ReadableFile::Open("parquet-arrow-example.parquet", + arrow::default_memory_pool())); + + std::unique_ptr reader; + PARQUET_THROW_NOT_OK( + parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + std::shared_ptr array; + PARQUET_THROW_NOT_OK(reader->ReadColumn(0, &array)); + PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout)); + std::cout << std::endl; +} + +// #5: Read only a single column of a RowGroup (this is known as ColumnChunk) +// from the Parquet file. +void read_single_column_chunk() { + std::cout << "Reading first ColumnChunk of the first RowGroup of " + "parquet-arrow-example.parquet" + << std::endl; + std::shared_ptr infile; + PARQUET_ASSIGN_OR_THROW(infile, + arrow::io::ReadableFile::Open("parquet-arrow-example.parquet", + arrow::default_memory_pool())); + + std::unique_ptr reader; + PARQUET_THROW_NOT_OK( + parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + std::shared_ptr array; + PARQUET_THROW_NOT_OK(reader->RowGroup(0)->Column(0)->Read(&array)); + PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout)); + std::cout << std::endl; +} + +class ObParquetAllocator : public ::arrow::MemoryPool +{ +public: + + /// Allocate a new memory region of at least size bytes. + /// + /// The allocated region shall be 64-byte aligned. + virtual arrow::Status Allocate(int64_t size, uint8_t** out) override + { + arrow::Status ret = arrow::Status::OK(); + void *buf = alloc_.alloc_aligned(size, 64); + if (OB_ISNULL(buf)) { + ret = arrow::Status::Invalid("allocate memory failed"); + } else { + *out = static_cast(buf); + } + std::cout << "Allocing : " << size << std::endl; + return arrow::Status::OK(); + } + + /// Resize an already allocated memory section. + /// + /// As by default most default allocators on a platform don't support aligned + /// reallocation, this function can involve a copy of the underlying data. + virtual arrow::Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) + { + std::cout << "Reallocing : " << old_size << ',' << new_size << std::endl; + return Allocate(new_size, ptr); + } + + /// Free an allocated region. + /// + /// @param buffer Pointer to the start of the allocated memory region + /// @param size Allocated size located at buffer. An allocator implementation + /// may use this for tracking the amount of allocated bytes as well as for + /// faster deallocation if supported by its backend. + virtual void Free(uint8_t* buffer, int64_t size) { + std::cout << "Freed : " << size << std::endl; + alloc_.free(buffer); + } + + /// Return unused memory to the OS + /// + /// Only applies to allocators that hold onto unused memory. This will be + /// best effort, a memory pool may not implement this feature or may be + /// unable to fulfill the request due to fragmentation. + virtual void ReleaseUnused() { + std::cout << "ReleaseUnused" << std::endl; + } + + /// The number of bytes that were allocated and not yet free'd through + /// this allocator. + virtual int64_t bytes_allocated() const override { + std::cout << "bytes_allocated()" << std::endl; + return alloc_.total(); + } + + /// Return peak memory allocation in this memory pool + /// + /// \return Maximum bytes allocated. If not known (or not implemented), + /// returns -1 + virtual int64_t max_memory() const override { return -1; } + + /// The name of the backend used by this MemoryPool (e.g. "system" or "jemalloc"). + virtual std::string backend_name() const override { return "Parquet"; } +private: + ObArenaAllocator alloc_; + arrow::internal::MemoryPoolStats stats_; +}; + + +class ObExternalFileReader : public arrow::io::RandomAccessFile { +public: + ObExternalFileReader(const char*file_name, arrow::MemoryPool *pool) { + file_reader_.open(file_name, false); + pool_ = pool; + file_name_ = file_name; + } + ~ObExternalFileReader() override {} + + virtual arrow::Status Close() override; + + virtual bool closed() const override; + + virtual arrow::Result Read(int64_t nbytes, void* out) override; + virtual arrow::Result> Read(int64_t nbytes) override; + virtual arrow::Result ReadAt(int64_t position, int64_t nbytes, void* out) override; + virtual arrow::Result> ReadAt(int64_t position, int64_t nbytes) override; + + + virtual arrow::Status Seek(int64_t position) override; + virtual arrow::Result Tell() const override; + virtual arrow::Result GetSize() override; +private: + ObFileReader file_reader_; + int64_t position_; + arrow::MemoryPool *pool_; + const char* file_name_; +}; + +arrow::Status ObExternalFileReader::Seek(int64_t position) { + std::cout<< "ObExternalFileReader::Seek" << std::endl; + position_ = position; + return arrow::Status::OK(); +} + +arrow::Result ObExternalFileReader::Read(int64_t nbytes, void *out) +{ + std::cout<< "ObExternalFileReader::Read(int64_t nbytes, void *out)" << std::endl; + int64_t read_size = -1; + file_reader_.pread(out, nbytes, position_, read_size); + position_ += read_size; + return read_size; +} + +arrow::Result> ObExternalFileReader::Read(int64_t nbytes) +{ + std::cout<< "ObExternalFileReader::Read(int64_t nbytes)" << std::endl; + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes, pool_)); + ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data())); + if (bytes_read < nbytes) { + RETURN_NOT_OK(buffer->Resize(bytes_read)); + } + return std::move(buffer); +} + + +arrow::Result ObExternalFileReader::ReadAt(int64_t position, int64_t nbytes, void* out) +{ + std::cout<< "ObExternalFileReader::ReadAt(int64_t position, int64_t nbytes, void* out)" << std::endl; + int64_t read_size = -1; + file_reader_.pread(out, nbytes, position, read_size); + position_ = position + read_size; + return read_size; +} +arrow::Result> ObExternalFileReader::ReadAt(int64_t position, int64_t nbytes) +{ + std::cout<< "ObExternalFileReader::ReadAt(int64_t position, int64_t nbytes)" << std::endl; + ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_)); + ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, + ReadAt(position, nbytes, buffer->mutable_data())); + if (bytes_read < nbytes) { + RETURN_NOT_OK(buffer->Resize(bytes_read)); + buffer->ZeroPadding(); + } + return std::move(buffer); +} + + +arrow::Result ObExternalFileReader::Tell() const +{ + std::cout<< "ObExternalFileReader::Tell()" << std::endl; + return position_; +} + +arrow::Result ObExternalFileReader::GetSize() +{ + std::cout<< "ObExternalFileReader::GetSize()" << std::endl; + int64_t file_size = 0; + FileDirectoryUtils::get_file_size(file_name_, file_size); + return file_size; +} + + +arrow::Status ObExternalFileReader::Close() +{ + std::cout<< "ObExternalFileReader::Close()" << std::endl; + file_reader_.close(); + return arrow::Status::OK(); +} + +bool ObExternalFileReader::closed() const +{ + std::cout<< "ObExternalFileReader::closed()" << std::endl; + return !file_reader_.is_opened(); +} + +void read_column_schema() { + std::cout << "Reading column schema " + "parquet-arrow-example.parquet" + << std::endl; + + ObParquetAllocator alloc; + parquet::ReaderProperties read_props(&alloc); + + std::cout<< "create parquet_file : " << std::endl; + std::shared_ptr reader = + std::make_shared("parquet-arrow-example.parquet", &alloc); + + std::cout<< "create file reader : " << std::endl; + std::unique_ptr parquet_reader = + parquet::ParquetFileReader::Open(reader, read_props); + + // Get the File MetaData + std::shared_ptr file_metadata = parquet_reader->metadata(); + + int num_row_groups = file_metadata->num_row_groups(); + int num_columns = file_metadata->num_columns(); + + std::cout<< "num_row_groups : " << num_row_groups << std::endl; + std::cout<< "num_columns : " << num_columns << std::endl; + + + for (int i = 0; i < num_columns; i++) { + std::cout<<"Path="<schema()->Column(i)->path()->ToDotString()<RowGroup(i)->ColumnChunk(j)->type(); + std::cout<<"ColumnType="< row_group_reader = + parquet_reader->RowGroup(r); + + std::shared_ptr column_reader; + column_reader = row_group_reader->Column(0); + parquet::Int64Reader* int64_reader = + static_cast(column_reader.get()); + + + while (int64_reader->HasNext()) { + std::cout << "before int64: " << std::endl; + rows_read = int64_reader->ReadBatch(1, nullptr, nullptr, &value, &values_read); + std::cout << "read int64: " << value << std::endl; + std::cout << "read rows: " << rows_read << std::endl; + std::cout << "read values_read: " << values_read << std::endl; + } + + + column_reader = row_group_reader->Column(1); + parquet::ByteArrayReader* ba_reader = + static_cast(column_reader.get()); + + while (ba_reader->HasNext()) { + parquet::ByteArray value; + std::cout << "before bytes: " << std::endl; + rows_read = + ba_reader->ReadBatch(1, nullptr, nullptr, &value, &values_read); + std::cout << "read bytes: " << std::string(pointer_cast(value.ptr), value.len) << std::endl; + std::cout << "read rows: " << rows_read << std::endl; + std::cout << "read values_read: " << values_read << std::endl; + } + } + +} + +using parquet::ConvertedType; +using parquet::Repetition; +using parquet::Type; +using parquet::schema::GroupNode; +using parquet::schema::PrimitiveNode; + +constexpr int FIXED_LENGTH = 10; +constexpr int FIXED_LENGTH_DEC = 14; + +static std::shared_ptr SetupSchema() { + parquet::schema::NodeVector fields; + fields.push_back(PrimitiveNode::Make("int", Repetition::OPTIONAL, parquet::LogicalType::Int(64, true), Type::INT64)); + fields.push_back(PrimitiveNode::Make("string", Repetition::OPTIONAL, parquet::LogicalType::String(), Type::BYTE_ARRAY)); + fields.push_back(PrimitiveNode::Make("decimal32", Repetition::OPTIONAL, parquet::LogicalType::Decimal(6, 3), Type::INT32)); + fields.push_back(PrimitiveNode::Make("decimal64", Repetition::OPTIONAL, parquet::LogicalType::Decimal(10, 0), Type::INT64)); + fields.push_back(PrimitiveNode::Make("decimalbytearr", Repetition::OPTIONAL, parquet::LogicalType::Decimal(20, 3), Type::BYTE_ARRAY)); + fields.push_back(PrimitiveNode::Make("date", Repetition::OPTIONAL, parquet::LogicalType::Date(), Type::INT32)); + fields.push_back(PrimitiveNode::Make("timestamp", Repetition::OPTIONAL, parquet::LogicalType::Timestamp(true, parquet::LogicalType::TimeUnit::MICROS), Type::INT64)); + + + return std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); +} + +void gen_test_parquet() { + /********************************************************************************** + PARQUET WRITER EXAMPLE + **********************************************************************************/ + // parquet::REQUIRED fields do not need definition and repetition level values + // parquet::OPTIONAL fields require only definition level values + // parquet::REPEATED fields require both definition and repetition level values + try { + // Create a local file output stream instance. + using FileClass = ::arrow::io::FileOutputStream; + std::shared_ptr out_file; + PARQUET_ASSIGN_OR_THROW(out_file, FileClass::Open(PARQUET_FILENAME)); + + // Setup the parquet schema + std::shared_ptr schema = SetupSchema(); + + // Add writer properties + parquet::WriterProperties::Builder builder; + builder.compression(parquet::Compression::SNAPPY); + std::shared_ptr props = builder.build(); + + // Create a ParquetFileWriter instance + std::shared_ptr file_writer = + parquet::ParquetFileWriter::Open(out_file, schema, props); + + // Append a RowGroup with a specific number of rows. + parquet::RowGroupWriter* rg_writer = NULL; +/* + // Write the Bool column + parquet::BoolWriter* bool_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + bool value = ((i % 2) == 0) ? true : false; + bool_writer->WriteBatch(1, nullptr, nullptr, &value); + } + + // Write the Int32 column + parquet::Int32Writer* int32_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int32_t value = i; + int32_writer->WriteBatch(1, nullptr, nullptr, &value); + } + // Write the Int64 column. Each row has repeats twice. + parquet::Int64Writer* int64_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < 2 * NUM_ROWS_PER_ROW_GROUP; i++) { + int64_t value = i * 1000 * 1000; + value *= 1000 * 1000; + int16_t definition_level = 1; + int16_t repetition_level = 0; + if ((i % 2) == 0) { + repetition_level = 1; // start of a new record + } + int64_writer->WriteBatch(1, &definition_level, &repetition_level, &value); + } + // Write the INT96 column. + parquet::Int96Writer* int96_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::Int96 value; + value.value[0] = i; + value.value[1] = i + 1; + value.value[2] = i + 2; + int96_writer->WriteBatch(1, nullptr, nullptr, &value); + } + + // Write the Float column + parquet::FloatWriter* float_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + float value = static_cast(i) * 1.1f; + float_writer->WriteBatch(1, nullptr, nullptr, &value); + } + + // Write the Double column + parquet::DoubleWriter* double_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + double value = i * 1.1111111; + double_writer->WriteBatch(1, nullptr, nullptr, &value); + } + + // Write the ByteArray column. Make every alternate values NULL + parquet::ByteArrayWriter* ba_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH] = "parquet"; + hello[7] = static_cast(static_cast('0') + i / 100); + hello[8] = static_cast(static_cast('0') + (i / 10) % 10); + hello[9] = static_cast(static_cast('0') + i % 10); + if (i % 2 == 0) { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(&hello[0]); + value.len = FIXED_LENGTH; + ba_writer->WriteBatch(1, &definition_level, nullptr, &value); + } else { + int16_t definition_level = 0; + ba_writer->WriteBatch(1, &definition_level, nullptr, nullptr); + } + } + + // Write the FixedLengthByteArray column + parquet::FixedLenByteArrayWriter* flba_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::FixedLenByteArray value; + char v = static_cast(i); + char flba[FIXED_LENGTH] = {v, v, v, v, v, v, v, v, v, v}; + value.ptr = reinterpret_cast(&flba[0]); + + flba_writer->WriteBatch(1, nullptr, nullptr, &value); + } +*/ +#define HAS_NULL 1 + + for (int j = 0; j < 10; j++) { + rg_writer = file_writer->AppendRowGroup(); + + parquet::Int64Writer* int64_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + if (HAS_NULL && i % 2 == 0) { + int16_t definition_level = 0; + int64_writer->WriteBatch(1, &definition_level, nullptr, NULL); + } else { + int64_t value = i * 1000 * 1000 + j; + value *= 1000 * 1000; + int16_t definition_level = 1; + int64_writer->WriteBatch(1, &definition_level, nullptr, &value); + } + } + + parquet::ByteArrayWriter* ba_writer = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH] = "parquet"; + hello[7] = static_cast(static_cast('0') + i / 100); + hello[8] = static_cast(static_cast('0') + (i / 10) % 10); + hello[9] = static_cast(static_cast('0') + i % 10); + if (HAS_NULL && i % 2 == 1) { + int16_t definition_level = 0; + ba_writer->WriteBatch(1, &definition_level, nullptr, nullptr); + } else { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(&hello[0]); + value.len = FIXED_LENGTH; + ba_writer->WriteBatch(1, &definition_level, nullptr, &value); + } + } + + parquet::Int32Writer* int32_writer2 = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + if (HAS_NULL && i % 3 == 0) { + int16_t definition_level = 0; + int32_writer2->WriteBatch(1, &definition_level, nullptr, NULL); + } else { + int32_t value = j * 10000 + i; + int16_t definition_level = 1; + int32_writer2->WriteBatch(1, &definition_level, nullptr, &value); + } + } + + parquet::Int64Writer* int64_writer2 = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + if (HAS_NULL && i % 3 == 1) { + int16_t definition_level = 0; + int64_writer2->WriteBatch(1, &definition_level, nullptr, NULL); + } else { + int64_t value = (j * 10000 + i) * 10000; + int16_t definition_level = 1; + int64_writer2->WriteBatch(1, &definition_level, nullptr, &value); + } + } + + parquet::ByteArrayWriter* ba_writer2 = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH_DEC] = "1234567890."; + hello[11] = static_cast(static_cast('0') + i / 100); + hello[12] = static_cast(static_cast('0') + (i / 10) % 10); + hello[13] = static_cast(static_cast('0') + i % 10); + if (HAS_NULL && i % 5 == 0) { + int16_t definition_level = 0; + ba_writer2->WriteBatch(1, &definition_level, nullptr, nullptr); + } else { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(&hello[0]); + value.len = FIXED_LENGTH_DEC; + ba_writer2->WriteBatch(1, &definition_level, nullptr, &value); + } + } + + parquet::Int32Writer* int32_writer3 = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + if (HAS_NULL && i % 6 == 0) { + int16_t definition_level = 0; + int32_writer3->WriteBatch(1, &definition_level, nullptr, NULL); + } else { + int32_t value = 19857 + 365 * 10 * j + i; + int16_t definition_level = 1; + int32_writer3->WriteBatch(1, &definition_level, nullptr, &value); + } + } + + + parquet::Int64Writer* int64_writer3 = + static_cast(rg_writer->NextColumn()); + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + if (HAS_NULL && i % 3 == 1) { + int16_t definition_level = 0; + int64_writer3->WriteBatch(1, &definition_level, nullptr, NULL); + } else { + int64_t value = (1716887565LL + i * 3600) * 1000 * 1000; + int16_t definition_level = 1; + int64_writer3->WriteBatch(1, &definition_level, nullptr, &value); + } + } + + } + + // Close the ParquetFileWriter + file_writer->Close(); + + // Write the bytes to file + DCHECK(out_file->Close().ok()); + } catch (const std::exception& e) { + std::cerr << "Parquet write error: " << e.what() << std::endl; + } +} + + +TEST_F(TestParquet, example1) +{ + std::shared_ptr table = generate_table(); + write_parquet_file(*table); + read_whole_file(); + read_single_rowgroup(); + read_single_column(); + read_single_column_chunk(); + read_column_schema(); + gen_test_parquet(); +} + +int main(int argc, char **argv) +{ + OB_LOGGER.set_log_level("INFO"); + testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} diff --git a/src/share/external_table/ob_external_table_utils.cpp b/src/share/external_table/ob_external_table_utils.cpp index 1990462d0..b7ae271d9 100644 --- a/src/share/external_table/ob_external_table_utils.cpp +++ b/src/share/external_table/ob_external_table_utils.cpp @@ -263,6 +263,8 @@ int ObExternalTableUtils::make_external_table_scan_range(const common::ObString obj_start[FILE_ID].set_int(file_id); obj_end[FILE_ID] = ObObj(); obj_end[FILE_ID].set_int(file_id); + obj_start[ROW_GROUP_NUMBER].set_min_value(); + obj_end[ROW_GROUP_NUMBER].set_max_value(); obj_start[LINE_NUMBER] = ObObj(); obj_start[LINE_NUMBER].set_int(first_lineno); obj_end[LINE_NUMBER] = ObObj(); @@ -576,7 +578,7 @@ int ObExternalTableUtils::collect_local_files_on_servers( context.get_cb_list().at(i)->~ObRpcAsyncLoadExternalTableFileCallBack(); } } - LOG_TRACE("update external table file list", K(ret), K(file_urls)); + LOG_TRACE("update external table file list", K(ret), K(file_urls), K(location), K(pattern), K(all_servers)); return ret; } diff --git a/src/share/external_table/ob_external_table_utils.h b/src/share/external_table/ob_external_table_utils.h index 0416c9d5c..715973479 100644 --- a/src/share/external_table/ob_external_table_utils.h +++ b/src/share/external_table/ob_external_table_utils.h @@ -57,6 +57,7 @@ class ObExternalTableUtils { PARTITION_ID = 0, FILE_URL, FILE_ID, + ROW_GROUP_NUMBER, LINE_NUMBER, MAX_EXTERNAL_FILE_SCANKEY }; diff --git a/src/share/inner_table/ob_inner_table_schema.21401_21450.cpp b/src/share/inner_table/ob_inner_table_schema.21401_21450.cpp index ba5f9e4e5..759347d3f 100644 --- a/src/share/inner_table/ob_inner_table_schema.21401_21450.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21401_21450.cpp @@ -860,7 +860,7 @@ int ObInnerTableSchema::dba_ob_external_table_files_schema(ObTableSchema &table_ table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, 'P0' AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND B.TENANT_ID = 0 INNER JOIN OCEANBASE.__ALL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND C.TENANT_ID = 0 WHERE B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) AND B.TABLE_MODE >> 12 & 15 in (0,1) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND B.TENANT_ID = 0 INNER JOIN OCEANBASE.__ALL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND C.TENANT_ID = 0 LEFT JOIN OCEANBASE.__ALL_PART P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = 0 WHERE B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) AND B.TABLE_MODE >> 12 & 15 in (0,1) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -910,7 +910,7 @@ int ObInnerTableSchema::all_ob_external_table_files_schema(ObTableSchema &table_ table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, 'P0' AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND B.TENANT_ID = 0 INNER JOIN OCEANBASE.__ALL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND C.TENANT_ID = 0 WHERE B.TABLE_TYPE = 14 AND B.TABLE_MODE >> 12 & 15 in (0,1) AND 0 = sys_privilege_check('table_acc', EFFECTIVE_TENANT_ID(), C.DATABASE_NAME, B.TABLE_NAME) AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND B.TENANT_ID = 0 INNER JOIN OCEANBASE.__ALL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND C.TENANT_ID = 0 LEFT JOIN OCEANBASE.__ALL_PART P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = 0 WHERE B.TABLE_TYPE = 14 AND B.TABLE_MODE >> 12 & 15 in (0,1) AND 0 = sys_privilege_check('table_acc', EFFECTIVE_TENANT_ID(), C.DATABASE_NAME, B.TABLE_NAME) AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1260,7 +1260,7 @@ int ObInnerTableSchema::cdb_ob_external_table_files_schema(ObTableSchema &table_ table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.TENANT_ID AS TENANT_ID, B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, 'P0' AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_VIRTUAL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_VIRTUAL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND A.TENANT_ID=B.TENANT_ID AND B.TABLE_MODE >> 12 & 15 in (0,1) INNER JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID=C.TENANT_ID WHERE B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.TENANT_ID AS TENANT_ID, B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_VIRTUAL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_VIRTUAL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND A.TENANT_ID=B.TENANT_ID AND B.TABLE_MODE >> 12 & 15 in (0,1) INNER JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID=C.TENANT_ID LEFT JOIN OCEANBASE.__ALL_VIRTUAL_PART P ON A.PART_ID = P.PART_ID AND C.TENANT_ID = P.TENANT_ID WHERE B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp b/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp index 47d237bcc..6e797f350 100644 --- a/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.25201_25250.cpp @@ -1660,7 +1660,7 @@ int ObInnerTableSchema::dba_ob_external_table_files_ora_schema(ObTableSchema &ta table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS OWNER, 'P0' AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM SYS.ALL_VIRTUAL_EXTERNAL_TABLE_FILE_REAL_AGENT A INNER JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT B ON A.TABLE_ID = B.TABLE_ID AND bitand((B.TABLE_MODE / 4096), 15) IN (0,1) INNER JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID = C.TENANT_ID WHERE B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS OWNER, P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM SYS.ALL_VIRTUAL_EXTERNAL_TABLE_FILE_REAL_AGENT A INNER JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT B ON A.TABLE_ID = B.TABLE_ID AND bitand((B.TABLE_MODE / 4096), 15) IN (0,1) INNER JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID = C.TENANT_ID LEFT JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = C.TENANT_ID WHERE B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1710,7 +1710,7 @@ int ObInnerTableSchema::all_ob_external_table_files_ora_schema(ObTableSchema &ta table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS OWNER, 'P0' AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM SYS.ALL_VIRTUAL_EXTERNAL_TABLE_FILE_REAL_AGENT A INNER JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT B ON A.TABLE_ID = B.TABLE_ID AND bitand((B.TABLE_MODE / 4096), 15) IN (0,1) INNER JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID = C.TENANT_ID WHERE B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.TABLE_TYPE = 14 AND (C.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, B.TABLE_ID, C.DATABASE_ID) = 1) AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS OWNER, P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM SYS.ALL_VIRTUAL_EXTERNAL_TABLE_FILE_REAL_AGENT A INNER JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT B ON A.TABLE_ID = B.TABLE_ID AND bitand((B.TABLE_MODE / 4096), 15) IN (0,1) INNER JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID = C.TENANT_ID LEFT JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = C.TENANT_ID WHERE B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.TABLE_TYPE = 14 AND (C.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, B.TABLE_ID, C.DATABASE_ID) = 1) AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index ac0e98a78..c3d942b9a 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -31540,13 +31540,14 @@ def_table_schema( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, - 'P0' AS PARTITION_NAME, + P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND B.TENANT_ID = 0 INNER JOIN OCEANBASE.__ALL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND C.TENANT_ID = 0 + LEFT JOIN OCEANBASE.__ALL_PART P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = 0 WHERE B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) AND B.TABLE_MODE >> 12 & 15 in (0,1) """.replace("\n", " ") @@ -31565,13 +31566,14 @@ def_table_schema( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, - 'P0' AS PARTITION_NAME, + P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND B.TENANT_ID = 0 INNER JOIN OCEANBASE.__ALL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND C.TENANT_ID = 0 + LEFT JOIN OCEANBASE.__ALL_PART P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = 0 WHERE B.TABLE_TYPE = 14 AND B.TABLE_MODE >> 12 & 15 in (0,1) AND 0 = sys_privilege_check('table_acc', EFFECTIVE_TENANT_ID(), C.DATABASE_NAME, B.TABLE_NAME) @@ -31871,13 +31873,14 @@ def_table_schema( A.TENANT_ID AS TENANT_ID, B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS TABLE_SCHEMA, - 'P0' AS PARTITION_NAME, + P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM OCEANBASE.__ALL_VIRTUAL_EXTERNAL_TABLE_FILE A INNER JOIN OCEANBASE.__ALL_VIRTUAL_TABLE B ON A.TABLE_ID = B.TABLE_ID AND A.TENANT_ID=B.TENANT_ID AND B.TABLE_MODE >> 12 & 15 in (0,1) INNER JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID=C.TENANT_ID + LEFT JOIN OCEANBASE.__ALL_VIRTUAL_PART P ON A.PART_ID = P.PART_ID AND C.TENANT_ID = P.TENANT_ID WHERE B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) """.replace("\n", " ") ) @@ -53216,13 +53219,14 @@ def_table_schema( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS OWNER, - 'P0' AS PARTITION_NAME, + P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM SYS.ALL_VIRTUAL_EXTERNAL_TABLE_FILE_REAL_AGENT A INNER JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT B ON A.TABLE_ID = B.TABLE_ID AND bitand((B.TABLE_MODE / 4096), 15) IN (0,1) INNER JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID = C.TENANT_ID + LEFT JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = C.TENANT_ID WHERE B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.TABLE_TYPE = 14 AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) """.replace("\n", " ") @@ -53243,13 +53247,14 @@ def_table_schema( SELECT B.TABLE_NAME AS TABLE_NAME, C.DATABASE_NAME AS OWNER, - 'P0' AS PARTITION_NAME, + P.PART_NAME AS PARTITION_NAME, A.FILE_URL AS FILE_URL, A.FILE_SIZE AS FILE_SIZE FROM SYS.ALL_VIRTUAL_EXTERNAL_TABLE_FILE_REAL_AGENT A INNER JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT B ON A.TABLE_ID = B.TABLE_ID AND bitand((B.TABLE_MODE / 4096), 15) IN (0,1) INNER JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT C ON B.DATABASE_ID = C.DATABASE_ID AND B.TENANT_ID = C.TENANT_ID + LEFT JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON A.PART_ID = P.PART_ID AND P.TENANT_ID = C.TENANT_ID WHERE B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.TABLE_TYPE = 14 AND (C.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, B.TABLE_ID, C.DATABASE_ID) = 1) AND (A.DELETE_VERSION = 9223372036854775807 OR A.DELETE_VERSION < A.CREATE_VERSION) diff --git a/src/share/schema/ob_schema_printer.cpp b/src/share/schema/ob_schema_printer.cpp index 9682b3c7a..e5194ad99 100644 --- a/src/share/schema/ob_schema_printer.cpp +++ b/src/share/schema/ob_schema_printer.cpp @@ -5350,16 +5350,19 @@ int ObSchemaPrinter::print_external_table_file_info(const ObTableSchema &table_s ObExternalFileFormat format; if (OB_FAIL(format.load_from_string(table_schema.get_external_file_format(), allocator))) { SHARE_SCHEMA_LOG(WARN, "fail to load from json string", K(ret)); - } else if (format.format_type_ != ObExternalFileFormat::CSV_FORMAT) { + } else if (!(format.format_type_ > ObExternalFileFormat::INVALID_FORMAT + && format.format_type_ < ObExternalFileFormat::MAX_FORMAT)) { + ret = OB_NOT_SUPPORTED; SHARE_SCHEMA_LOG(WARN, "unsupported to print file format", K(ret), K(format.format_type_)); - } else { + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\nFORMAT (\n"))) { + SHARE_SCHEMA_LOG(WARN, "fail to print FORMAT (", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, " TYPE = '%s',", ObExternalFileFormat::FORMAT_TYPE_STR[format.format_type_]))) { + SHARE_SCHEMA_LOG(WARN, "fail to print TYPE", K(ret)); + } + if (OB_SUCC(ret) && ObExternalFileFormat::CSV_FORMAT == format.format_type_) { const ObCSVGeneralFormat &csv = format.csv_format_; const ObOriginFileFormat &origin_format = format.origin_file_format_str_; - if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\nFORMAT (\n"))) { - SHARE_SCHEMA_LOG(WARN, "fail to print FORMAT (", K(ret)); - } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, " TYPE = 'CSV',"))) { - SHARE_SCHEMA_LOG(WARN, "fail to print TYPE", K(ret)); - } else if (OB_FAIL(0 != csv.line_term_str_.case_compare(ObDataInFileStruct::DEFAULT_LINE_TERM_STR) && + if (OB_FAIL(0 != csv.line_term_str_.case_compare(ObDataInFileStruct::DEFAULT_LINE_TERM_STR) && databuff_printf(buf, buf_len, pos, "\n LINE_DELIMITER = %.*s,", origin_format.origin_line_term_str_.length(), origin_format.origin_line_term_str_.ptr()))) { SHARE_SCHEMA_LOG(WARN, "fail to print LINE_DELIMITER", K(ret)); } else if (OB_FAIL(0 != csv.field_term_str_.case_compare(ObDataInFileStruct::DEFAULT_FIELD_TERM_STR) && @@ -5388,11 +5391,12 @@ int ObSchemaPrinter::print_external_table_file_info(const ObTableSchema &table_s } else if (OB_FAIL(0 != csv.null_if_.count() && databuff_printf(buf, buf_len, pos, "\n NULL_IF = (%.*s),", origin_format.origin_null_if_str_.length(), origin_format.origin_null_if_str_.ptr()))) { SHARE_SCHEMA_LOG(WARN, "fail to print NULL_IF", K(ret)); - } else { - --pos; - if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n) "))) { - SHARE_SCHEMA_LOG(WARN, "fail to print )", K(ret)); - } + } + } + if (OB_SUCC(ret)) { + --pos; + if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n) "))) { + SHARE_SCHEMA_LOG(WARN, "fail to print )", K(ret)); } } } diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index 15b212711..4ea0cf8a3 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -394,6 +394,7 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_func_round.cpp engine/expr/ob_expr_func_sleep.cpp engine/expr/ob_expr_get_package_var.cpp + engine/expr/ob_expr_get_path.cpp engine/expr/ob_expr_get_subprogram_var.cpp engine/expr/ob_expr_get_sys_var.cpp engine/expr/ob_expr_get_user_var.cpp @@ -877,6 +878,7 @@ ob_set_subtarget(ob_sql engine_table engine/table/ob_index_lookup_op_impl.cpp engine/table/ob_table_scan_with_index_back_op.cpp engine/table/ob_external_table_access_service.cpp + engine/table/ob_parquet_table_row_iter.cpp ) ob_set_subtarget(ob_sql executor diff --git a/src/sql/code_generator/ob_static_engine_expr_cg.cpp b/src/sql/code_generator/ob_static_engine_expr_cg.cpp index 72599971d..45c129e2d 100644 --- a/src/sql/code_generator/ob_static_engine_expr_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_expr_cg.cpp @@ -21,6 +21,7 @@ #include "sql/engine/expr/ob_expr_lob_utils.h" #include "share/vector/ob_vector_define.h" #include "sql/engine/expr/ob_datum_cast.h" +#include "sql/engine/expr/ob_expr_get_path.h" namespace oceanbase { @@ -501,6 +502,25 @@ int ObStaticEngineExprCG::cg_expr_by_operator(const ObIArray &raw_e } } } + } else if (T_PSEUDO_EXTERNAL_FILE_COL == raw_expr->get_expr_type()) { + ObIExprExtraInfo *extra_info = nullptr; + ObPseudoColumnRawExpr *column_expr = static_cast(raw_expr); + if (OB_FAIL(ObExprExtraInfoFactory::alloc(*op_cg_ctx_.allocator_, rt_expr->type_, extra_info))) { + LOG_WARN("Failed to allocate memory for ObExprOracleLRpadInfo", K(ret)); + } else if (OB_ISNULL(extra_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("extra_info should not be nullptr", K(ret)); + } else { + ObDataAccessPathExtraInfo *data_access_info = static_cast(extra_info); + if (OB_FAIL(ob_write_string(*op_cg_ctx_.allocator_, + column_expr->get_data_access_path(), + data_access_info->data_access_path_))) { + LOG_WARN("fail to write string", K(ret)); + } else { + rt_expr->extra_info_ = extra_info; + LOG_DEBUG("external file col expr", K(ret), "path", data_access_info->data_access_path_); + } + } } else if (!IS_EXPR_OP(rt_expr->type_) || IS_AGGR_FUN(rt_expr->type_)) { // do nothing } else if (OB_FAIL(expr_cg_impl.generate_expr_operator(*raw_expr, expr_op_fetcher))) { diff --git a/src/sql/engine/cmd/ob_load_data_parser.cpp b/src/sql/engine/cmd/ob_load_data_parser.cpp index 16d21cf9f..0b971c2ef 100644 --- a/src/sql/engine/cmd/ob_load_data_parser.cpp +++ b/src/sql/engine/cmd/ob_load_data_parser.cpp @@ -18,6 +18,7 @@ #include "lib/utility/ob_print_utils.h" #include "lib/string/ob_hex_utils_base.h" #include "deps/oblib/src/lib/list/ob_dlist.h" +#include "share/schema/ob_column_schema.h" using namespace oceanbase::sql; using namespace oceanbase::common; @@ -28,10 +29,13 @@ namespace sql { const char INVALID_TERM_CHAR = '\xff'; -const char * FORMAT_TYPE_STR[] = { +const char * ObExternalFileFormat::FORMAT_TYPE_STR[] = { "CSV", + "PARQUET", }; -static_assert(array_elements(FORMAT_TYPE_STR) == ObExternalFileFormat::MAX_FORMAT, "Not enough initializer for ObExternalFileFormat"); + +static_assert(array_elements(ObExternalFileFormat::FORMAT_TYPE_STR) == ObExternalFileFormat::MAX_FORMAT, + "Not enough initializer for ObExternalFileFormat"); int ObCSVGeneralFormat::init_format(const ObDataInFileStruct &format, int64_t file_column_nums, @@ -347,7 +351,7 @@ int64_t ObExternalFileFormat::to_string(char *buf, const int64_t buf_len) const pos += origin_file_format_str_.to_json_kv_string(buf + pos, buf_len - pos); break; default: - pos = 0; + pos += 0; } J_OBJ_END(); @@ -389,6 +393,8 @@ int ObExternalFileFormat::load_from_string(const ObString &str, ObIAllocator &al OZ (csv_format_.load_from_json_data(format_type_node, allocator)); OZ (origin_file_format_str_.load_from_json_data(format_type_node, allocator)); break; + case PARQUET_FORMAT: + break; default: ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid format type", K(ret), K(format_type_str)); @@ -399,6 +405,45 @@ int ObExternalFileFormat::load_from_string(const ObString &str, ObIAllocator &al return ret; } +int ObExternalFileFormat::mock_gen_column_def( + const share::schema::ObColumnSchemaV2 &column, + ObIAllocator &allocator, + ObString &def) +{ + int ret = OB_SUCCESS; + ObSqlString temp_str; + switch (format_type_) { + case CSV_FORMAT: { + uint64_t file_column_idx = column.get_column_id() - OB_APP_MIN_COLUMN_ID + 1; + if (OB_FAIL(temp_str.append_fmt("%s%lu", N_EXTERNAL_FILE_COLUMN_PREFIX, file_column_idx))) { + LOG_WARN("fail to append sql str", K(ret)); + } + break; + } + case PARQUET_FORMAT: { + if (OB_FAIL(temp_str.append_fmt("get_path(%s, '%.*s')", + N_EXTERNAL_FILE_ROW, + column.get_column_name_str().length(), + column.get_column_name_str().ptr()))) { + LOG_WARN("fail to append sql str", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected format", K(ret), K(format_type_)); + } + + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ob_write_string(allocator, temp_str.string(), def))) { + LOG_WARN("fail to write string", K(ret)); + } + } + + return ret; +} + int ObExternalFileFormat::StringData::store_str(const ObString &str) { return ob_write_string(allocator_, str, str_); diff --git a/src/sql/engine/cmd/ob_load_data_parser.h b/src/sql/engine/cmd/ob_load_data_parser.h index 43a988fa1..fd43ec831 100644 --- a/src/sql/engine/cmd/ob_load_data_parser.h +++ b/src/sql/engine/cmd/ob_load_data_parser.h @@ -21,6 +21,11 @@ namespace oceanbase { +namespace share { +namespace schema { +class ObColumnSchemaV2; +} +} namespace sql { class ObDataInFileStruct; @@ -495,18 +500,27 @@ struct ObExternalFileFormat enum FormatType { INVALID_FORMAT = -1, CSV_FORMAT, + PARQUET_FORMAT, MAX_FORMAT }; + enum Options { + OPT_REPLACE_INVALID_CHARACTERS = 1 << 0, + OPT_BINARY_AS_TEXT = 1 << 1, + }; ObExternalFileFormat() : format_type_(INVALID_FORMAT) {} int64_t to_string(char* buf, const int64_t buf_len) const; - int load_from_string(const common::ObString &str, ObIAllocator &allocator); + int load_from_string(const common::ObString &str, common::ObIAllocator &allocator); + int mock_gen_column_def(const share::schema::ObColumnSchemaV2 &column, common::ObIAllocator &allocator, common::ObString &def); ObOriginFileFormat origin_file_format_str_; FormatType format_type_; sql::ObCSVGeneralFormat csv_format_; + uint64_t options_; + + static const char *FORMAT_TYPE_STR[]; }; diff --git a/src/sql/engine/expr/ob_expr_cast.cpp b/src/sql/engine/expr/ob_expr_cast.cpp index 56c52398b..43e7d0f84 100644 --- a/src/sql/engine/expr/ob_expr_cast.cpp +++ b/src/sql/engine/expr/ob_expr_cast.cpp @@ -614,7 +614,7 @@ int ObExprCast::calc_result_type2(ObExprResType &type, int ObExprCast::get_cast_type(const bool enable_decimal_int, const ObExprResType param_type2, const ObCastMode cast_mode, - ObExprResType &dst_type) const + ObExprResType &dst_type) { int ret = OB_SUCCESS; if (!param_type2.is_int() && !param_type2.get_param().is_int()) { diff --git a/src/sql/engine/expr/ob_expr_cast.h b/src/sql/engine/expr/ob_expr_cast.h index bacc08233..f335e4a42 100644 --- a/src/sql/engine/expr/ob_expr_cast.h +++ b/src/sql/engine/expr/ob_expr_cast.h @@ -140,12 +140,12 @@ public: sql::ObEvalCtx &ctx, sql::ObDatum &res_datum); virtual int is_valid_for_generated_column(const ObRawExpr*expr, const common::ObIArray &exprs, bool &is_valid) const; + static int get_cast_type(const bool enable_decimal_int, + const ObExprResType param_type2, + const ObCastMode cast_mode, + ObExprResType &dst_type); DECLARE_SET_LOCAL_SESSION_VARS; private: - int get_cast_type(const bool enable_decimal_int, - const ObExprResType param_type2, - const ObCastMode cast_mode, - ObExprResType &dst_type) const; int get_explicit_cast_cm(const ObExprResType &src_type, const ObExprResType &dst_type, const ObSQLSessionInfo &session, diff --git a/src/sql/engine/expr/ob_expr_extra_info_factory.cpp b/src/sql/engine/expr/ob_expr_extra_info_factory.cpp index bba3747bf..0651985f3 100644 --- a/src/sql/engine/expr/ob_expr_extra_info_factory.cpp +++ b/src/sql/engine/expr/ob_expr_extra_info_factory.cpp @@ -37,6 +37,7 @@ #include "sql/engine/expr/ob_expr_json_schema_valid.h" #include "sql/engine/expr/ob_expr_json_schema_validation_report.h" #include "sql/engine/expr/ob_expr_json_utils.h" +#include "sql/engine/expr/ob_expr_get_path.h" namespace oceanbase { @@ -46,18 +47,18 @@ namespace sql #define REG_EXTRA_INFO(type, ExtraInfoClass) \ do { \ - static_assert(type > T_INVALID && type < T_MAX_OP, "invalid expr type for extra info"); \ + static_assert(is_valid_item_type(type), "invalid expr type for extra info"); \ ALLOC_FUNS_[type] = ObExprExtraInfoFactory::alloc; \ } while(0) -ObExprExtraInfoFactory::AllocExtraInfoFunc ObExprExtraInfoFactory::ALLOC_FUNS_[T_MAX_OP] = { }; +ObExprExtraInfoFactory::AllocExtraInfoFunc ObExprExtraInfoFactory::ALLOC_FUNS_[ObExprExtraInfoFactory::MAX_ITEM_ID] = { }; int ObExprExtraInfoFactory::alloc(common::ObIAllocator &alloc, const ObExprOperatorType &type, ObIExprExtraInfo *&extra_info) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!(type > T_INVALID && type < T_MAX_OP))) { + if (OB_UNLIKELY(!is_valid_item_type(type))) { ret = OB_INVALID_ARGUMENT; OB_LOG(WARN, "invalid argument", K(ret), K(type)); } else if (OB_ISNULL(ALLOC_FUNS_[type])) { @@ -114,6 +115,7 @@ void ObExprExtraInfoFactory::register_expr_extra_infos() REG_EXTRA_INFO(T_FUN_SYS_JSON_SCHEMA_VALIDATION_REPORT, ObExprJsonSchemaValidInfo); REG_EXTRA_INFO(T_FUN_SYS_JSON_VALUE, ObExprJsonQueryParamInfo); REG_EXTRA_INFO(T_FUN_SYS_JSON_QUERY, ObExprJsonQueryParamInfo); + REG_EXTRA_INFO(T_PSEUDO_EXTERNAL_FILE_COL, ObDataAccessPathExtraInfo); } } // end namespace sql diff --git a/src/sql/engine/expr/ob_expr_extra_info_factory.h b/src/sql/engine/expr/ob_expr_extra_info_factory.h index 7da9df70e..4d8567696 100644 --- a/src/sql/engine/expr/ob_expr_extra_info_factory.h +++ b/src/sql/engine/expr/ob_expr_extra_info_factory.h @@ -26,6 +26,7 @@ struct ObIExprExtraInfo; struct ObExprExtraInfoFactory { public: + static constexpr int64_t MAX_ITEM_ID = T_DEFAULT; typedef int (*AllocExtraInfoFunc) (common::ObIAllocator &alloc, ObIExprExtraInfo *&extra_info, const ObExprOperatorType type); // allocate extra info @@ -35,9 +36,13 @@ public: static void register_expr_extra_infos(); + inline static constexpr bool is_valid_item_type(const ObExprOperatorType &type) { + return (type > T_INVALID && type < MAX_ITEM_ID); + } + inline static bool is_registered(const ObExprOperatorType &type) { - return type > T_INVALID && type < T_MAX_OP + return is_valid_item_type(type) && NULL != ALLOC_FUNS_[type]; } @@ -47,7 +52,7 @@ private: const ObExprOperatorType type); private: - static AllocExtraInfoFunc ALLOC_FUNS_[T_MAX_OP]; + static AllocExtraInfoFunc ALLOC_FUNS_[MAX_ITEM_ID]; }; template diff --git a/src/sql/engine/expr/ob_expr_get_path.cpp b/src/sql/engine/expr/ob_expr_get_path.cpp new file mode 100644 index 000000000..fca198991 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_get_path.cpp @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "sql/engine/expr/ob_expr_get_path.h" +#include "lib/string/ob_string.h" +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +int ObDataAccessPathExtraInfo::deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObExprExtraInfoFactory::alloc(allocator, type, copied_info))) { + LOG_WARN("Failed to allocate memory for ObExprOracleLRpadInfo", K(ret)); + } else if (OB_ISNULL(copied_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("extra_info should not be nullptr", K(ret)); + } else { + ObDataAccessPathExtraInfo *other = static_cast(copied_info); + if (OB_FAIL(ob_write_string(allocator, data_access_path_, other->data_access_path_))) { + LOG_WARN("fail to write string", K(ret)); + } + } + return ret; +} + + +OB_SERIALIZE_MEMBER(ObDataAccessPathExtraInfo, data_access_path_); + +} +} diff --git a/src/sql/engine/expr/ob_expr_get_path.h b/src/sql/engine/expr/ob_expr_get_path.h new file mode 100644 index 000000000..ae8be8ba6 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_get_path.h @@ -0,0 +1,67 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_EXPR_GET_PATH_H +#define OB_EXPR_GET_PATH_H + +#include "sql/engine/expr/ob_expr_operator.h" +namespace oceanbase +{ +namespace sql +{ + +struct ObDataAccessPathExtraInfo : public ObIExprExtraInfo +{ + OB_UNIS_VERSION(1); +public: + ObDataAccessPathExtraInfo(common::ObIAllocator &alloc, ObExprOperatorType type) + : ObIExprExtraInfo(alloc, type) + {} + virtual ~ObDataAccessPathExtraInfo() {} + virtual int deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const override; + TO_STRING_KV(K(type_), K(data_access_path_)); + ObString data_access_path_; +}; + + +class ObExprGetPath: public ObFuncExprOperator +{ +public: + explicit ObExprGetPath(common::ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_GET_PATH, N_GET_PATH, 2, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) {} + virtual ~ObExprGetPath() {} + virtual int calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + ObExprTypeCtx &type_ctx) const + { + UNUSED(type1); + UNUSED(type2); + UNUSED(type_ctx); + type.set_varchar(); + type.set_collation_type(CS_TYPE_BINARY); + return common::OB_SUCCESS; + } + virtual int cg_expr(ObExprCGCtx &op_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override { + return common::OB_NOT_SUPPORTED; + } +private: + DISALLOW_COPY_AND_ASSIGN(ObExprGetPath); +}; + +} +} +#endif // OB_EXPR_GET_PATH_H diff --git a/src/sql/engine/expr/ob_expr_inner_trim.cpp b/src/sql/engine/expr/ob_expr_inner_trim.cpp index 6b43f4c3e..f9361324a 100644 --- a/src/sql/engine/expr/ob_expr_inner_trim.cpp +++ b/src/sql/engine/expr/ob_expr_inner_trim.cpp @@ -22,6 +22,7 @@ namespace oceanbase { using namespace common; +using namespace share; namespace sql { diff --git a/src/sql/engine/expr/ob_expr_operator_factory.cpp b/src/sql/engine/expr/ob_expr_operator_factory.cpp index c7c0f828a..c1316cfda 100644 --- a/src/sql/engine/expr/ob_expr_operator_factory.cpp +++ b/src/sql/engine/expr/ob_expr_operator_factory.cpp @@ -453,6 +453,7 @@ #include "sql/engine/expr/ob_expr_lock_func.h" #include "sql/engine/expr/ob_expr_topn_filter.h" +#include "sql/engine/expr/ob_expr_get_path.h" using namespace oceanbase::common; namespace oceanbase @@ -1114,6 +1115,7 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprRbAndnotNull2empty); REG_OP(ObExprRbToString); REG_OP(ObExprRbFromString); + REG_OP(ObExprGetPath); }(); // 注册oracle系统函数 REG_OP_ORCL(ObExprSysConnectByPath); @@ -1440,6 +1442,7 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP_ORCL(ObExprInnerTableSequenceGetter); // REG_OP_ORCL(ObExprTopNFilter); REG_OP_ORCL(ObExprSdoRelate); + REG_OP_ORCL(ObExprGetPath); } bool ObExprOperatorFactory::is_expr_op_type_valid(ObExprOperatorType type) diff --git a/src/sql/engine/expr/ob_expr_trim.cpp b/src/sql/engine/expr/ob_expr_trim.cpp index 537f2069c..0571ac6de 100644 --- a/src/sql/engine/expr/ob_expr_trim.cpp +++ b/src/sql/engine/expr/ob_expr_trim.cpp @@ -23,6 +23,7 @@ namespace oceanbase { using namespace common; +using namespace share; namespace sql { diff --git a/src/sql/engine/expr/vector_cast/vector_cast.cpp b/src/sql/engine/expr/vector_cast/vector_cast.cpp index 7bc13c48e..e92ed39e4 100644 --- a/src/sql/engine/expr/vector_cast/vector_cast.cpp +++ b/src/sql/engine/expr/vector_cast/vector_cast.cpp @@ -206,10 +206,11 @@ ObExpr::EvalVectorFunc VectorCasterUtil::get_vector_cast(const VecValueTypeClass const ObCastMode cast_mode) { ObExpr::EvalVectorFunc ret_func = nullptr; + ObExpr::EvalFunc temp_func = nullptr; if (is_eval_arg_cast) { ret_func = CM_IS_EXPLICIT_CAST(cast_mode) ? VECTOR_EVAL_ARG_CAST_FUNCS[in_tc][out_tc][EXPLICIT_CAST_FLAG] : VECTOR_EVAL_ARG_CAST_FUNCS[in_tc][out_tc][IMPLICIT_CAST_FLAG]; - } else if (row_cast_fn == cast_not_expected + } else if (row_cast_fn == (temp_func = cast_not_expected) || row_cast_fn == cast_not_support || row_cast_fn == cast_inconsistent_types || row_cast_fn == cast_inconsistent_types_json diff --git a/src/sql/engine/table/ob_external_table_access_service.cpp b/src/sql/engine/table/ob_external_table_access_service.cpp index ff4e2d4af..31e826a73 100644 --- a/src/sql/engine/table/ob_external_table_access_service.cpp +++ b/src/sql/engine/table/ob_external_table_access_service.cpp @@ -21,6 +21,7 @@ #include "share/external_table/ob_external_table_utils.h" #include "share/ob_device_manager.h" #include "lib/utility/ob_macro_utils.h" +#include "sql/engine/table/ob_parquet_table_row_iter.h" namespace oceanbase { @@ -49,7 +50,7 @@ void ObExternalDataAccessDriver::close() } } -bool ObExternalDataAccessDriver::is_opened() +bool ObExternalDataAccessDriver::is_opened() const { return fd_.is_valid(); } @@ -103,13 +104,13 @@ int ObExternalDataAccessDriver::get_file_size(const ObString &url, int64_t &file return ret; } -int ObExternalDataAccessDriver::open(const ObString &url) +int ObExternalDataAccessDriver::open(const char *url) { int ret = OB_SUCCESS; if (OB_ISNULL(device_handle_)) { ret = OB_NOT_INIT; } else { - ret = device_handle_->open(url.ptr(), -1, 0, fd_, &iod_opts_); + ret = device_handle_->open(url, -1, 0, fd_, &iod_opts_); } return ret; } @@ -347,6 +348,13 @@ int ObExternalTableAccessService::table_scan( LOG_WARN("alloc memory failed", K(ret)); } break; + + case ObExternalFileFormat::PARQUET_FORMAT: + if (OB_ISNULL(row_iter = OB_NEWx(ObParquetTableRowIterator, (scan_param.allocator_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } + break; default: ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected format", K(ret), "format", param.external_file_format_.format_type_); @@ -375,6 +383,7 @@ int ObExternalTableAccessService::table_rescan(ObVTableScanParam ¶m, ObNewRo } else { switch (param.external_file_format_.format_type_) { case ObExternalFileFormat::CSV_FORMAT: + case ObExternalFileFormat::PARQUET_FORMAT: result->reset(); break; default: @@ -407,6 +416,12 @@ int ObExternalTableAccessService::revert_scan_iter(ObNewRowIterator *iter) return ret; } +int ObExternalTableRowIterator::init(const ObTableScanParam *scan_param) +{ + scan_param_ = scan_param; + return init_exprs(scan_param); +} + ObCSVTableRowIterator::~ObCSVTableRowIterator() { release_buf(); @@ -478,7 +493,17 @@ int ObCSVTableRowIterator::expand_buf() return ret; } -int ObCSVTableRowIterator::init_exprs(const storage::ObTableScanParam *scan_param) +int ObExternalTableRowIterator::gen_ip_port(ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + char buf[MAX_IP_PORT_SQL_LENGTH]; + int32_t len = 0; + OZ (GCONF.self_addr_.addr_to_buffer(buf, MAX_IP_PORT_SQL_LENGTH, len)); + OZ (ob_write_string(allocator, ObString(len, buf), ip_port_)); + return ret; +} + +int ObExternalTableRowIterator::init_exprs(const storage::ObTableScanParam *scan_param) { int ret = OB_SUCCESS; if (OB_ISNULL(scan_param)) { @@ -523,7 +548,6 @@ int ObCSVTableRowIterator::init(const storage::ObTableScanParam *scan_param) arena_alloc_.set_attr(lib::ObMemAttr(scan_param->tenant_id_, "CSVRowIter")); OZ (ObExternalTableRowIterator::init(scan_param)); OZ (parser_.init(scan_param->external_file_format_.csv_format_)); - OZ (init_exprs(scan_param)); OZ (data_access_driver_.init(scan_param_->external_file_location_, scan_param->external_file_access_info_)); OZ (expand_buf()); @@ -563,37 +587,64 @@ int ObCSVTableRowIterator::get_next_file_and_line_number(const int64_t task_idx, return ret; } -int ObCSVTableRowIterator::update_file_partition_list_value(const int64_t part_id) +int ObExternalTableRowIterator::fill_file_partition_expr(ObExpr *expr, ObNewRow &value, const int64_t row_count) { int ret = OB_SUCCESS; - if (part_id != state_.part_id_) { - state_.part_id_ = part_id; - share::schema::ObSchemaGetterGuard schema_guard; - const ObTableSchema *table_schema = NULL; - const ObPartition *partition = NULL; - if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard( - scan_param_->tenant_id_, - schema_guard))) { - LOG_WARN("get_schema_guard failed", K(ret)); - } else if (OB_FAIL(schema_guard.get_table_schema(scan_param_->tenant_id_, scan_param_->index_id_, table_schema))) { - LOG_WARN("get table schema failed", K(ret)); - } else if (table_schema->is_partitioned_table() && table_schema->is_user_specified_partition_for_external_table()) { - if (OB_FAIL(table_schema->get_partition_by_part_id(part_id, CHECK_PARTITION_MODE_NORMAL, partition))) { - LOG_WARN("get partition failed", K(ret), K(part_id)); - } else if (OB_ISNULL(partition) || OB_UNLIKELY(partition->get_list_row_values().count() != 1) - || partition->get_list_row_values().at(0).get_count() != table_schema->get_partition_key_column_num()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("partition is invalid", K(ret), K(part_id)); - } else { - int64_t pos = 0; - int64_t size = partition->get_list_row_values().at(0).get_deep_copy_size(); - char *buf = (char *)arena_alloc_.alloc(size); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate mem failed", K(ret)); - } - OZ (state_.part_list_val_.deep_copy(partition->get_list_row_values().at(0), buf, size, pos)); + ObEvalCtx &eval_ctx = scan_param_->op_->get_eval_ctx(); + ObDatum *datums = expr->locate_batch_datums(eval_ctx); + int64_t loc_idx = expr->extra_ - 1; + if (OB_UNLIKELY(loc_idx < 0 || loc_idx >= value.get_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("loc idx is out of range", K(loc_idx), K(value), K(ret)); + } else { + if (value.get_cell(loc_idx).is_null()) { + for (int j = 0; OB_SUCC(ret) && j < row_count; j++) { + datums[j].set_null(); } + } else { + for (int j = 0; OB_SUCC(ret) && j < row_count; j++) { + CK (OB_NOT_NULL(datums[j].ptr_)); + OZ (datums[j].from_obj(value.get_cell(loc_idx))); + } + } + } + return ret; +} + +int ObExternalTableRowIterator::calc_file_partition_list_value(const int64_t part_id, ObIAllocator &allocator, ObNewRow &value) +{ + int ret = OB_SUCCESS; + share::schema::ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = NULL; + const ObPartition *partition = NULL; + if (OB_ISNULL(GCTX.schema_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error"); + } else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard( + scan_param_->tenant_id_, + schema_guard))) { + LOG_WARN("get_schema_guard failed", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schema(scan_param_->tenant_id_, scan_param_->index_id_, table_schema))) { + LOG_WARN("get table schema failed", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(scan_param_->index_id_), K(scan_param_->tenant_id_)); + } else if (table_schema->is_partitioned_table() && table_schema->is_user_specified_partition_for_external_table()) { + if (OB_FAIL(table_schema->get_partition_by_part_id(part_id, CHECK_PARTITION_MODE_NORMAL, partition))) { + LOG_WARN("get partition failed", K(ret), K(part_id)); + } else if (OB_ISNULL(partition) || OB_UNLIKELY(partition->get_list_row_values().count() != 1) + || partition->get_list_row_values().at(0).get_count() != table_schema->get_partition_key_column_num()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("partition is invalid", K(ret), K(part_id)); + } else { + int64_t pos = 0; + int64_t size = partition->get_list_row_values().at(0).get_deep_copy_size(); + char *buf = (char *)allocator.alloc(size); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate mem failed", K(ret)); + } + OZ (value.deep_copy(partition->get_list_row_values().at(0), buf, size, pos)); } } return ret; @@ -620,7 +671,10 @@ int ObCSVTableRowIterator::open_next_file() } else if (part_id == 0) { //empty file do not belong to any partitions } else { - OZ (update_file_partition_list_value(part_id)); + if (part_id != state_.part_id_) { + state_.part_id_ = part_id; + OZ (calc_file_partition_list_value(part_id, arena_alloc_, state_.part_list_val_)); + } } if (OB_SUCC(ret)) { if (start_line == MIN_EXTERNAL_TABLE_LINE_NUMBER && end_line == INT64_MAX) { @@ -655,7 +709,7 @@ int ObCSVTableRowIterator::open_next_file() } LOG_DEBUG("try next file", K(ret), K(url_), K(file_url), K(state_)); } while (OB_SUCC(ret) && 0 >= state_.file_size_); //skip empty file - OZ (data_access_driver_.open(url_.string()), url_); + OZ (data_access_driver_.open(url_.ptr()), url_); LOG_DEBUG("open external file", K(ret), K(url_), K(state_.file_size_), K(location)); @@ -987,7 +1041,7 @@ int ObCSVTableRowIterator::get_next_rows(int64_t &count, int64_t capacity) OZ (column_convert_expr->eval_batch(eval_ctx, *bit_vector_cache_, returned_row_cnt)); if (OB_SUCC(ret)) { MEMCPY(column_expr->locate_batch_datums(eval_ctx), - column_convert_expr->locate_batch_datums(eval_ctx), sizeof(ObDatum) * returned_row_cnt); + column_convert_expr->locate_batch_datums(eval_ctx), sizeof(ObDatum) * returned_row_cnt); column_expr->set_evaluated_flag(eval_ctx); } } @@ -1006,6 +1060,5 @@ void ObCSVTableRowIterator::reset() - } } diff --git a/src/sql/engine/table/ob_external_table_access_service.h b/src/sql/engine/table/ob_external_table_access_service.h index 11ba2823e..bb5cd1ad4 100644 --- a/src/sql/engine/table/ob_external_table_access_service.h +++ b/src/sql/engine/table/ob_external_table_access_service.h @@ -22,7 +22,6 @@ #include "common/storage/ob_io_device.h" #include "share/backup/ob_backup_struct.h" - namespace oceanbase { namespace common @@ -38,8 +37,8 @@ public: ObExternalDataAccessDriver() : storage_type_(common::OB_STORAGE_MAX_TYPE), device_handle_(nullptr) {} ~ObExternalDataAccessDriver(); int init(const common::ObString &location, const ObString &access_info); - int open(const common::ObString &url); - bool is_opened(); + int open(const char *url); + bool is_opened() const; int get_file_size(const common::ObString &url, int64_t &file_size); int get_file_sizes(const ObString &location, const ObIArray &urls, ObIArray &file_sizes); @@ -66,13 +65,24 @@ private: class ObExternalTableRowIterator : public common::ObNewRowIterator { public: - ObExternalTableRowIterator() : scan_param_(nullptr) {} - virtual int init(const storage::ObTableScanParam *scan_param) { - scan_param_ = scan_param; - return common::OB_SUCCESS; - } + ObExternalTableRowIterator() : + scan_param_(nullptr), line_number_expr_(NULL), file_id_expr_(NULL), file_name_expr_(NULL) + {} + virtual int init(const storage::ObTableScanParam *scan_param); +protected: + int init_exprs(const storage::ObTableScanParam *scan_param); + int gen_ip_port(common::ObIAllocator &allocator); + int calc_file_partition_list_value(const int64_t part_id, common::ObIAllocator &allocator, common::ObNewRow &value); + int fill_file_partition_expr(ObExpr *expr, common::ObNewRow &value, const int64_t row_count); protected: const storage::ObTableScanParam *scan_param_; + //external table column exprs + common::ObSEArray column_exprs_; + //hidden columns + ObExpr *line_number_expr_; + ObExpr *file_id_expr_; + ObExpr *file_name_expr_; + common::ObString ip_port_; }; class ObExternalTableAccessService : public common::ObITabletScan @@ -144,13 +154,11 @@ public: K(cur_file_name_), K(cur_file_id_), K(cur_line_number_), K(line_count_limit_), K_(part_id), K_(ip_port_len), K_(file_with_url)); }; - ObCSVTableRowIterator() : bit_vector_cache_(NULL), line_number_expr_(NULL), file_id_expr_(NULL), - file_name_expr_(NULL) {} + ObCSVTableRowIterator() : bit_vector_cache_(NULL) {} virtual ~ObCSVTableRowIterator(); virtual int init(const storage::ObTableScanParam *scan_param) override; int get_next_row() override; int get_next_rows(int64_t &count, int64_t capacity) override; - int update_file_partition_list_value(const int64_t part_id); virtual int get_next_row(ObNewRow *&row) override { UNUSED(row); @@ -172,7 +180,6 @@ private: int skip_lines(); void release_buf(); void dump_error_log(common::ObIArray &error_msgs); - int init_exprs(const storage::ObTableScanParam *scan_param); private: ObBitVector *bit_vector_cache_; StateValues state_; @@ -181,15 +188,11 @@ private: ObCSVGeneralParser parser_; ObExternalDataAccessDriver data_access_driver_; ObSqlString url_; - ObSEArray column_exprs_; - ObExpr *line_number_expr_; - ObExpr *file_id_expr_; ObExpr *file_name_expr_; }; + } - - } #endif // OB_EXTERNAL_TABLE_ACCESS_SERVICE_H_ diff --git a/src/sql/engine/table/ob_parquet_table_row_iter.cpp b/src/sql/engine/table/ob_parquet_table_row_iter.cpp new file mode 100644 index 000000000..428de10d6 --- /dev/null +++ b/src/sql/engine/table/ob_parquet_table_row_iter.cpp @@ -0,0 +1,1409 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "ob_parquet_table_row_iter.h" +#include "sql/engine/expr/ob_expr_get_path.h" +#include "share/external_table/ob_external_table_utils.h" +#include "sql/engine/expr/ob_datum_cast.h" +#include "sql/engine/ob_exec_context.h" +#include + +namespace oceanbase +{ +using namespace share::schema; +using namespace common; +using namespace share; +namespace sql { + +void ObArrowMemPool::init(uint64_t tenant_id) +{ + mem_attr_ = ObMemAttr(tenant_id, "ArrowMemPool"); +} + +arrow::Status ObArrowMemPool::Allocate(int64_t size, uint8_t** out) +{ + int ret = OB_SUCCESS; + arrow::Status status_ret = arrow::Status::OK(); + if (0 == size) { + *out = NULL; + } else { + void *buf = ob_malloc_align(64, size, mem_attr_); + if (OB_ISNULL(buf)) { + status_ret = arrow::Status::Invalid("allocate memory failed"); + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(size), K(lbt())); + } else { + *out = static_cast(buf); + total_alloc_size_ += size; + } + } + LOG_DEBUG("ObArrowMemPool::Allocate", K(size), "stack", lbt()); + return status_ret; +} + +arrow::Status ObArrowMemPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) +{ + int ret = OB_SUCCESS; + uint8_t* old = *ptr; + arrow::Status status_ret = Allocate(new_size, ptr); + if (arrow::Status::OK() == status_ret) { + MEMCPY(ptr, old, old_size); + Free(old, old_size); + } + LOG_DEBUG("ObArrowMemPool::Reallocate", K(old_size), K(new_size), "stack", lbt()); + return status_ret; +} + +void ObArrowMemPool::Free(uint8_t* buffer, int64_t size) { + int ret = OB_SUCCESS; + ob_free_align(buffer); + total_alloc_size_ -= size; + LOG_DEBUG("ObArrowMemPool::Free", K(size), "stack", lbt()); +} + +void ObArrowMemPool::ReleaseUnused() { + LOG_DEBUG("ObArrowMemPool::ReleaseUnused", "stack", lbt()); +} + +int64_t ObArrowMemPool::bytes_allocated() const { + LOG_DEBUG("ObArrowMemPool::bytes_allocated", "stack", lbt()); + return total_alloc_size_; +} + + +int ObArrowFile::open() +{ + return file_reader_.open(file_name_); +} + +arrow::Status ObArrowFile::Seek(int64_t position) { + position_ = position; + return arrow::Status::OK(); +} + +arrow::Result ObArrowFile::Read(int64_t nbytes, void *out) +{ + int ret = OB_SUCCESS; + arrow::Result ret_code; + int64_t read_size = -1; + if (OB_FAIL(file_reader_.pread(out, nbytes, position_, read_size))) { + LOG_WARN("fail to read file", K(ret), K(nbytes)); + ret_code = arrow::Result(arrow::Status(arrow::StatusCode::IOError, "read file failed")); + } else { + position_ += read_size; + ret_code = read_size; + } + LOG_DEBUG("Read(int64_t nbytes, void *out)", K(nbytes)); + return ret_code; +} + +arrow::Result> ObArrowFile::Read(int64_t nbytes) +{ + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes, pool_)); + ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data())); + if (bytes_read < nbytes) { + RETURN_NOT_OK(buffer->Resize(bytes_read)); + } + LOG_DEBUG("ObArrowFile::Read(int64_t nbytes)", K(nbytes)); + return std::move(buffer); +} + + +arrow::Result ObArrowFile::ReadAt(int64_t position, int64_t nbytes, void* out) +{ + int ret = OB_SUCCESS; + arrow::Result ret_code; + int64_t read_size = -1; + + if (OB_FAIL(file_reader_.pread(out, nbytes, position, read_size))) { + LOG_WARN("fail to read file", K(ret), K(position), K(nbytes)); + ret_code = arrow::Result(arrow::Status(arrow::StatusCode::IOError, "read at file failed")); + } else { + position_ = position + read_size; + ret_code = read_size; + } + LOG_DEBUG("ObArrowFile::Read(int64_t nbytes)", K(nbytes)); + return ret_code; +} + +arrow::Result> ObArrowFile::ReadAt(int64_t position, int64_t nbytes) +{ + ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_)); + ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, + ReadAt(position, nbytes, buffer->mutable_data())); + if (bytes_read < nbytes) { + RETURN_NOT_OK(buffer->Resize(bytes_read)); + buffer->ZeroPadding(); + } + LOG_DEBUG("ObArrowFile::ReadAt(int64_t position, int64_t nbytes)", K(nbytes)); + return std::move(buffer); +} + + +arrow::Result ObArrowFile::Tell() const +{ + return position_; +} + +arrow::Result ObArrowFile::GetSize() +{ + int ret = OB_SUCCESS; + arrow::Result ret_code; + int64_t file_size = 0; + if (OB_FAIL(file_reader_.get_file_size(file_name_, file_size))) { + LOG_WARN("fail to get file size", K(ret), K(file_name_)); + ret_code = arrow::Result(arrow::Status(arrow::StatusCode::IOError, "get file size")); + } else { + ret_code = file_size; + } + return ret_code; +} + +arrow::Status ObArrowFile::Close() +{ + file_reader_.close(); + return arrow::Status::OK(); +} + +bool ObArrowFile::closed() const +{ + return !file_reader_.is_opened(); +} + +bool mem_zero_detect(void *buf, size_t n) +{ + size_t size = n; + if (size == 0) + return true; + uint8_t * ptr = (uint8_t *)buf; + if (*ptr == 0 && memcmp(ptr, ptr + 1, size - 1) == 0) + return true; + return false; +} + +int ObParquetTableRowIterator::init(const storage::ObTableScanParam *scan_param) +{ + int ret = OB_SUCCESS; + ObEvalCtx &eval_ctx = scan_param->op_->get_eval_ctx(); + mem_attr_ = ObMemAttr(MTL_ID(), "ParquetRowIter"); + allocator_.set_attr(mem_attr_); + arrow_alloc_.init(MTL_ID()); + OZ (ObExternalTableRowIterator::init(scan_param)); + OZ (data_access_driver_.init(scan_param->external_file_location_, + scan_param->external_file_access_info_)); + + if (OB_SUCC(ret)) { + ObArray file_column_exprs; + ObArray file_meta_column_exprs; + for (int i = 0; OB_SUCC(ret) && i < scan_param->ext_file_column_exprs_->count(); i++) { + ObExpr* ext_file_column_expr = scan_param->ext_file_column_exprs_->at(i); + if (OB_ISNULL(ext_file_column_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ptr", K(ret)); + } else if (ext_file_column_expr->type_ == T_PSEUDO_EXTERNAL_FILE_URL + || ext_file_column_expr->type_ == T_PSEUDO_PARTITION_LIST_COL) { + OZ (file_meta_column_exprs.push_back(ext_file_column_expr)); + } else if (ext_file_column_expr->type_ == T_PSEUDO_EXTERNAL_FILE_COL) { + OZ (file_column_exprs.push_back(ext_file_column_expr)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(ext_file_column_expr)); + } + } + OZ (file_column_exprs_.assign(file_column_exprs)); + OZ (file_meta_column_exprs_.assign(file_meta_column_exprs)); + + if (file_column_exprs_.count() > 0) { + OZ (column_indexs_.allocate_array(allocator_, file_column_exprs_.count())); + OZ (column_readers_.allocate_array(allocator_, file_column_exprs_.count())); + OZ (load_funcs_.allocate_array(allocator_, file_column_exprs_.count())); + } + LOG_DEBUG("check exprs", K(file_column_exprs), K(file_meta_column_exprs), KPC(scan_param->ext_file_column_exprs_)); + } + + if (OB_SUCC(ret) && OB_ISNULL(bit_vector_cache_)) { + void *mem = nullptr; + if (OB_ISNULL(mem = allocator_.alloc(ObBitVector::memory_size(eval_ctx.max_batch_size_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for skip", K(ret), K(eval_ctx.max_batch_size_)); + } else { + bit_vector_cache_ = to_bit_vector(mem); + bit_vector_cache_->reset(eval_ctx.max_batch_size_); + } + } + + if (OB_SUCC(ret)) { + OZ (def_levels_buf_.allocate_array(allocator_, eval_ctx.max_batch_size_)); + OZ (rep_levels_buf_.allocate_array(allocator_, eval_ctx.max_batch_size_)); + } + + if (OB_SUCC(ret)) { + OZ (file_url_ptrs_.allocate_array(allocator_, eval_ctx.max_batch_size_)); + OZ (file_url_lens_.allocate_array(allocator_, eval_ctx.max_batch_size_)); + } + + return ret; +} + +int ObParquetTableRowIterator::next_file() +{ + int ret = OB_SUCCESS; + ObEvalCtx &eval_ctx = scan_param_->op_->get_eval_ctx(); + ObString location = scan_param_->external_file_location_; + int64_t task_idx = 0; + int64_t file_size = 0; + + do { + if ((task_idx = state_.file_idx_++) >= scan_param_->key_ranges_.count()) { + ret = OB_ITER_END; + } else { + state_.cur_file_url_ = scan_param_->key_ranges_.at(task_idx).get_start_key().get_obj_ptr()[ObExternalTableUtils::FILE_URL].get_string(); + url_.reuse(); + const char *split_char = "/"; + OZ (url_.append_fmt("%.*s%s%.*s", location.length(), location.ptr(), + (location.empty() || location[location.length() - 1] == '/') ? "" : split_char, + state_.cur_file_url_.length(), state_.cur_file_url_.ptr())); + OZ (data_access_driver_.get_file_size(url_.string(), file_size)); + + if (OB_SUCC(ret)) { + ObString expr_file_url; + if (data_access_driver_.get_storage_type() == OB_STORAGE_FILE) { + ObSqlString full_name; + if (ip_port_.empty()) { + OZ(gen_ip_port(allocator_)); + } + OZ (full_name.append_fmt("%.*s%%%.*s", ip_port_.length(), ip_port_.ptr(), + state_.cur_file_url_.length(), state_.cur_file_url_.ptr())); + OZ (ob_write_string(allocator_, full_name.string(), expr_file_url)); + } else { + expr_file_url = state_.cur_file_url_; + } + for (int i = 0; OB_SUCC(ret) && i < eval_ctx.max_batch_size_; i++) { + file_url_ptrs_.at(i) = expr_file_url.ptr(); + file_url_lens_.at(i) = expr_file_url.length(); + } + } + + LOG_DEBUG("current external file", K(url_), K(file_size)); + } + } while (OB_SUCC(ret) && OB_UNLIKELY(0 >= file_size)); //skip not exist or empty file + + if (OB_SUCC(ret)) { + + int64_t part_id = scan_param_->key_ranges_.at(task_idx).get_start_key().get_obj_ptr()[ObExternalTableUtils::PARTITION_ID].get_int(); + if (part_id != 0 && state_.part_id_ != part_id) { + state_.part_id_ = part_id; + OZ (calc_file_partition_list_value(part_id, allocator_, state_.part_list_val_)); + } + + state_.cur_file_id_ = scan_param_->key_ranges_.at(task_idx).get_start_key().get_obj_ptr()[ObExternalTableUtils::FILE_ID].get_int(); + OZ (ObExternalTableUtils::resolve_line_number_range(scan_param_->key_ranges_.at(task_idx), + ObExternalTableUtils::ROW_GROUP_NUMBER, + state_.cur_row_group_idx_, + state_.end_row_group_idx_)); + + try { + file_meta_.reset(); + file_reader_.reset(); + std::shared_ptr cur_file = + std::make_shared(data_access_driver_, url_.ptr(), &arrow_alloc_); + OZ (cur_file.get()->open()); + if (OB_SUCC(ret)) { + file_reader_ = parquet::ParquetFileReader::Open(cur_file, read_props_); + file_meta_ = file_reader_->metadata(); + state_.end_row_group_idx_ = std::min((int64_t)(file_meta_->num_row_groups()), state_.end_row_group_idx_); + } + for (int i = 0; OB_SUCC(ret) && i < file_column_exprs_.count(); i++) { + ObDataAccessPathExtraInfo *data_access_info = + static_cast(file_column_exprs_.at(i)->extra_info_); + int column_index = + file_meta_->schema()->ColumnIndex(std::string(data_access_info->data_access_path_.ptr(), + data_access_info->data_access_path_.length())); + const parquet::ColumnDescriptor *col_desc = NULL; + if (column_index < 0) { + ret = OB_ERR_INVALID_JSON_PATH; + LOG_WARN("invalid path", K(data_access_info->data_access_path_)); + } else { + col_desc = file_meta_->schema()->Column(column_index); + load_funcs_.at(i) = DataLoader::select_load_function(file_column_exprs_.at(i)->datum_meta_, col_desc); + if (OB_ISNULL(load_funcs_.at(i)) + || col_desc->max_repetition_level() != 0) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + std::string p_type = col_desc->logical_type()->ToString(); + int64_t pos = 0; + ObArrayWrap buf; + ObDatumMeta &meta = file_column_exprs_.at(i)->datum_meta_; + const char *ob_type = ob_obj_type_str(file_column_exprs_.at(i)->datum_meta_.type_); + if (OB_SUCCESS == buf.allocate_array(allocator_, 100)) { + ob_sql_type_str(buf.get_data(), buf.count(), pos, meta.type_, + OB_MAX_VARCHAR_LENGTH, meta.precision_, meta.scale_, meta.cs_type_); + if (pos < buf.count()) { + buf.at(pos++) = '\0'; + ob_type = buf.get_data(); + } + } + LOG_WARN("not supported type", K(ret), K(file_column_exprs_.at(i)->datum_meta_), + K(ObString(p_type.length(), p_type.data())), "rep_level", col_desc->max_repetition_level()); + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, p_type.c_str(), ob_type); + } else { + column_indexs_.at(i) = column_index; + LOG_DEBUG("mapped ob type", K(column_index), "column type", + file_meta_->schema()->Column(column_index)->physical_type(), "path", + data_access_info->data_access_path_); + } + } + } + } catch(const std::exception& e) { + if (OB_SUCC(ret)) { + //invalid file + ret = OB_INVALID_EXTERNAL_FILE; + LOG_USER_ERROR(OB_INVALID_EXTERNAL_FILE, e.what()); + LOG_WARN("unexpected error", K(ret), "Info", e.what()); + } + } catch(...) { + if (OB_SUCC(ret)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret)); + } + } + } + + return ret; +} + +int ObParquetTableRowIterator::next_row_group() +{ + int ret = OB_SUCCESS; + //init all meta + if (OB_SUCC(ret) && state_.cur_row_group_idx_ > state_.end_row_group_idx_) { + if (OB_FAIL(next_file())) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to next row group", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + int64_t cur_row_group = (state_.cur_row_group_idx_++) - 1; + try { + std::shared_ptr rg_reader = file_reader_->RowGroup(cur_row_group); + state_.cur_row_group_read_row_count_ = 0; + state_.cur_row_group_row_count_ = file_meta_->RowGroup(cur_row_group)->num_rows(); + for (int i = 0; OB_SUCC(ret) && i < column_indexs_.count(); i++) { + column_readers_.at(i) = rg_reader->Column(column_indexs_.at(i)); + } + } catch(const std::exception& e) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index", K(ret), "Info", e.what(), K(cur_row_group), K(column_indexs_)); + } catch(...) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index", K(ret), K(cur_row_group), K(column_indexs_)); + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_data_for_col(LOAD_FUNC &func) +{ + return (this->*func)(); +} + +ObParquetTableRowIterator::DataLoader::LOAD_FUNC ObParquetTableRowIterator::DataLoader::select_load_function( + const ObDatumMeta &datum_type, const parquet::ColumnDescriptor *col_desc) +{ + LOAD_FUNC func = NULL; + const parquet::LogicalType* log_type = col_desc->logical_type().get(); + parquet::Type::type phy_type = col_desc->physical_type(); + bool no_log_type = log_type->is_none(); + if ((no_log_type || log_type->is_int()) && ob_is_integer_type(datum_type.type_)) { + //convert parquet int storing as int32/int64 to + // ObTinyIntType/ObSmallIntType/ObMediumIntType/ObInt32Type/ObIntType using int64_t memory layout + // ObUTinyIntType/ObUSmallIntType/ObUMediumIntType/ObUInt32Type/ObUInt64Type using uint64_t memory layout + if (parquet::Type::INT64 == phy_type) { + func = &DataLoader::load_int64_to_int64_vec; + } else if (parquet::Type::INT32 == phy_type) { + func = &DataLoader::load_int32_to_int64_vec; + } + //sign and width + ObObj temp_obj; + temp_obj.set_int(datum_type.type_, 0); + if ((no_log_type || static_cast(log_type)->is_signed()) != temp_obj.is_signed_integer()) { + func = NULL; + } + if (no_log_type ? (temp_obj.get_tight_data_len() != (parquet::Type::INT32 == phy_type ? 4 : 8)) + : static_cast(log_type)->bit_width() > temp_obj.get_tight_data_len() * 8) { + func = NULL; + } + } else if ((no_log_type || log_type->is_string() || log_type->is_enum()) + && (ob_is_string_type(datum_type.type_) || ob_is_enum_or_set_type(datum_type.type_))) { + //convert parquet enum/string to enum/string vector + if (parquet::Type::BYTE_ARRAY == phy_type) { + func = &DataLoader::load_string_col; + } else if (parquet::Type::FIXED_LEN_BYTE_ARRAY == phy_type) { + func = &DataLoader::load_fixed_string_col; + } + } else if ((no_log_type || log_type->is_decimal() || log_type->is_int()) + && ob_is_number_or_decimal_int_tc(datum_type.type_)) { + //convert parquet int storing as int32/int64 to number/decimal vector + if (log_type->is_decimal() && (col_desc->type_precision() != datum_type.precision_ + || col_desc->type_scale() != datum_type.scale_)) { + func = NULL; + } else { + //there is 4 kinds of physical format in parquet(int32/int64/fixedbytearray/bytearray) + // and 2 class of types for OB vector(decimalint/number) + if (parquet::Type::INT32 == phy_type && ob_is_decimal_int_tc(datum_type.type_) + && DECIMAL_INT_32 == get_decimalint_type(datum_type.precision_)) { + func = &DataLoader::load_int32_to_int32_vec; + } else if (parquet::Type::INT64 == phy_type && ob_is_decimal_int_tc(datum_type.type_) + && DECIMAL_INT_64 == get_decimalint_type(datum_type.precision_)) { + func = &DataLoader::load_int64_to_int64_vec; + } else if (parquet::Type::INT32 == phy_type + || parquet::Type::INT64 == phy_type + || parquet::Type::BYTE_ARRAY == phy_type + || parquet::Type::FIXED_LEN_BYTE_ARRAY == phy_type) { + func = &DataLoader::load_decimal_any_col; + } + } + } else if ((no_log_type || log_type->is_date()) + && (ob_is_datetime(datum_type.type_) || ob_is_date_tc(datum_type.type_))) { + if (parquet::Type::INT32 == phy_type && ob_is_date_tc(datum_type.type_)) { + func = &DataLoader::load_int32_to_int32_vec; + } else if (parquet::Type::INT32 == phy_type && ob_is_datetime(datum_type.type_)) { + func = &DataLoader::load_date_col_to_datetime; + } + } else if (log_type->is_time() && ob_is_time_tc(datum_type.type_)) { + switch (static_cast(log_type)->time_unit()) { + case parquet::LogicalType::TimeUnit::unit::MILLIS: { + if (parquet::Type::INT32 == phy_type) { + func = &DataLoader::load_time_millis_col; + } + break; + } + case parquet::LogicalType::TimeUnit::unit::MICROS: { + if (parquet::Type::INT64 == phy_type) { + func = &DataLoader::load_int64_to_int64_vec; + } + break; + } + case parquet::LogicalType::TimeUnit::unit::NANOS: { + if (parquet::Type::INT64 == phy_type) { + func = &DataLoader::load_time_nanos_col; + } + break; + } + default: { + func = NULL; + } + } + } else if (log_type->is_timestamp() && parquet::Type::INT64 == phy_type + && (ob_is_otimestamp_type(datum_type.type_) || ObTimestampType == datum_type.type_)) { + switch (static_cast(log_type)->time_unit()) { + case parquet::LogicalType::TimeUnit::unit::MILLIS: { + if (ObTimestampType == datum_type.type_ + || ObTimestampLTZType == datum_type.type_ + || ObTimestampNanoType == datum_type.type_) { + func = &DataLoader::load_timestamp_millis_col; + } + break; + } + case parquet::LogicalType::TimeUnit::unit::MICROS: { + if (ObTimestampType == datum_type.type_ && is_parquet_store_utc(log_type)) { + //mysql timestamp storing utc timestamp as int64 values + func = &DataLoader::load_int64_to_int64_vec; + } else if (ObTimestampType == datum_type.type_ + || ObTimestampLTZType == datum_type.type_ + || ObTimestampNanoType == datum_type.type_) { + func = &DataLoader::load_timestamp_micros_col; + } + break; + } + case parquet::LogicalType::TimeUnit::unit::NANOS: { + if (ObTimestampType == datum_type.type_ + || ObTimestampLTZType == datum_type.type_ + || ObTimestampNanoType == datum_type.type_) { + func = &DataLoader::load_timestamp_nanos_col; + } + break; + } + default: { + func = NULL; + } + } + } else if ((no_log_type || log_type->is_timestamp()) && parquet::Type::INT96 == phy_type + && (ob_is_otimestamp_type(datum_type.type_) || ObTimestampType == datum_type.type_)) { + func = &DataLoader::load_timestamp_hive; + } else if (no_log_type && parquet::Type::FLOAT == phy_type && ObFloatType == datum_type.type_) { + func = &DataLoader::load_float; + } else if (no_log_type && parquet::Type::DOUBLE == phy_type && ObDoubleType == datum_type.type_) { + func = &DataLoader::load_double; + } else if (log_type->is_interval() + || log_type->is_map() + || log_type->is_list() + || log_type->is_JSON()) { + func = NULL; + } + return func; +} + + +#define IS_PARQUET_COL_NOT_NULL (0 == max_def_level) +#define IS_PARQUET_COL_VALUE_IS_NULL(V) (V < max_def_level) + +int ObParquetTableRowIterator::DataLoader::load_int32_to_int32_vec() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + if (IS_PARQUET_COL_NOT_NULL && values_cnt == row_count_) { + MEMCPY(pointer_cast(dec_vec->get_data()), values.get_data(), sizeof(int32_t) * row_count_); + } else { + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + dec_vec->set_int32(i, values.at(j++)); + } + } + } + } + return ret; +} + +// convert int value to decimal int or number +int ObParquetTableRowIterator::DataLoader::to_numeric(const int64_t idx, const int64_t int_value) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + if (ObDecimalIntType == file_col_expr_->datum_meta_.type_) { + ObFixedLengthBase *vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + ObDecimalInt *decint = NULL; + int32_t int_bytes = 0; + if (OB_FAIL(wide::from_integer(int_value, tmp_alloc_g.get_allocator(), decint, + int_bytes, file_col_expr_->datum_meta_.precision_))) { + LOG_WARN("fail to from integer", K(ret)); + } else { + vec->set_decimal_int(idx, decint, int_bytes); + } + } else if (ObNumberType == file_col_expr_->datum_meta_.type_) { + ObDiscreteBase *vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + number::ObNumber res_nmb; + if (OB_FAIL(res_nmb.from(int_value, tmp_alloc_g.get_allocator()))) { + LOG_WARN("fail to from number", K(ret)); + } else { + vec->set_number(idx, res_nmb); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not supported type", K(file_col_expr_->datum_meta_)); + } + return ret; +} + +// convert string value to decimal int or number +int ObParquetTableRowIterator::DataLoader::to_numeric( + const int64_t idx, + const char *str, + const int32_t length) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObDecimalInt *decint = NULL; + int32_t val_len = 0; + int16_t in_precision = 0; + int16_t in_scale = 0; + int16_t out_precision = file_col_expr_->datum_meta_.precision_; + int16_t out_scale = file_col_expr_->datum_meta_.scale_; + if (OB_FAIL(wide::from_string(str, length, tmp_alloc_g.get_allocator(), in_scale, in_precision, val_len, decint))) { + LOG_WARN("fail to from number", K(ret), KPHEX(str, length)); + } else { + if (ObDecimalIntType == file_col_expr_->datum_meta_.type_) { + ObFixedLengthBase *vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + if (in_precision > out_precision) { + ret = OB_DECIMAL_PRECISION_OVERFLOW; + } else { + ObDecimalIntBuilder res_val; + if (ObDatumCast::need_scale_decimalint(in_scale, in_precision, out_scale, out_precision)) { + if (OB_FAIL(ObDatumCast::common_scale_decimalint(decint, val_len, in_scale, out_scale, + out_precision, 0, res_val))) { + LOG_WARN("scale decimal int failed", K(ret)); + } else { + vec->set_decimal_int(idx, res_val.get_decimal_int(), res_val.get_int_bytes()); + } + } else { + vec->set_decimal_int(idx, decint, val_len); + } + } + } else if (ObNumberType == file_col_expr_->datum_meta_.type_) { + ObDiscreteBase *vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + number::ObNumber res_nmb; + if (OB_FAIL(wide::to_number(decint, val_len, file_col_expr_->datum_meta_.scale_, + tmp_alloc_g.get_allocator(), res_nmb))) { + LOG_WARN("fail to from", K(ret)); + } else { + vec->set_number(idx, res_nmb); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not supported type", K(file_col_expr_->datum_meta_)); + } + } + + return ret; +} + +int ObParquetTableRowIterator::DataLoader::to_numeric_hive( + const int64_t idx, + const char *str, + const int32_t length, + char *buf, + const int64_t data_len) +{ + int ret = OB_SUCCESS; + ObDecimalInt *decint = NULL; + int32_t val_len = 0; + + if (OB_UNLIKELY(length > data_len)) { + ret = OB_DECIMAL_PRECISION_OVERFLOW; + LOG_WARN("overflow", K(length), K(data_len)); + } else { + //to little endian + MEMSET(buf, (*str >> 8), data_len); + int64_t pos = 0; + int64_t temp_len = length; + while (temp_len >= 8) { + uint64_t temp_v = *(pointer_cast(str + temp_len - 8)); + *(pointer_cast(buf + pos)) = ntohll(temp_v); + pos+=8; + temp_len-=8; + } + if (temp_len > 0) { + MEMCPY(buf + pos + 8 - temp_len, str, temp_len); + uint64_t temp_v = *(pointer_cast(buf + pos)); + *(pointer_cast(buf + pos)) = ntohll(temp_v); + } + decint = pointer_cast(buf); + val_len = static_cast(data_len); + if (ObDecimalIntType == file_col_expr_->datum_meta_.type_) { + ObFixedLengthBase *vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + vec->set_decimal_int(idx, decint, val_len); + } else if (ObNumberType == file_col_expr_->datum_meta_.type_) { + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObDiscreteBase *vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + number::ObNumber res_nmb; + if (OB_FAIL(wide::to_number(decint, val_len, file_col_expr_->datum_meta_.scale_, + tmp_alloc_g.get_allocator(), res_nmb))) { + LOG_WARN("fail to from", K(ret)); + } else { + vec->set_number(idx, res_nmb); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not supported type", K(file_col_expr_->datum_meta_)); + } + } + return ret; +} + +//convert int32/int64/string value(from parquet file) to decimal int or number(ob types) +int ObParquetTableRowIterator::DataLoader::load_decimal_any_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + int16_t max_def_level = reader_->descr()->max_definition_level(); + //int16_t def_precision = static_cast(reader_->descr()->type_precision()); + //int16_t def_scale = static_cast(reader_->descr()->type_precision()); + + if (reader_->descr()->physical_type() == parquet::Type::type::INT32) { + ObArrayWrap values; + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + OZ (to_numeric(i, values.at(j++))); + } + } + } else if (reader_->descr()->physical_type() == parquet::Type::type::INT64) { + ObArrayWrap values; + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + OZ (to_numeric(i, values.at(j++))); + } + } + } else if (reader_->descr()->physical_type() == parquet::Type::Type::FIXED_LEN_BYTE_ARRAY) { + ObArrayWrap values; + int32_t fixed_length = reader_->descr()->type_length(); + int32_t int_bytes = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(file_col_expr_->datum_meta_.precision_); + ObArrayWrap buffer; + OZ (buffer.allocate_array(tmp_alloc_g.get_allocator(), int_bytes)); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + parquet::FixedLenByteArray &cur_v = values.at(j++); + OZ (to_numeric_hive(i, pointer_cast(cur_v.ptr), fixed_length, buffer.get_data(), buffer.count())); + //OZ (to_numeric(i, pointer_cast(cur_v.ptr), fixed_length)); + } + } + } else if (reader_->descr()->physical_type() == parquet::Type::Type::BYTE_ARRAY) { + ObArrayWrap values; + int32_t int_bytes = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(file_col_expr_->datum_meta_.precision_); + ObArrayWrap buffer; + OZ (buffer.allocate_array(tmp_alloc_g.get_allocator(), int_bytes)); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + parquet::ByteArray &cur_v = values.at(j++); + OZ (to_numeric_hive(i, pointer_cast(cur_v.ptr), cur_v.len, buffer.get_data(), buffer.count())); + //OZ (to_numeric(i, pointer_cast(cur_v.ptr), cur_v.len)); + } + } + } + + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_fixed_string_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + int16_t max_def_level = reader_->descr()->max_definition_level(); + StrDiscVec *text_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + ObArrayWrap values; + + CK (VEC_DISCRETE == text_vec->get_format()); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + int32_t fixed_length = reader_->descr()->type_length(); + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + if (OB_UNLIKELY(values_cnt > row_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("repeated data not support"); + } else { + bool is_byte_length = is_oracle_byte_length( + lib::is_oracle_mode(), file_col_expr_->datum_meta_.length_semantics_); + int j = 0; + for (int i = 0; i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + text_vec->set_null(i); + } else { + parquet::FixedLenByteArray &cur_v = values.at(j++); + text_vec->set_string(i, pointer_cast(cur_v.ptr), fixed_length); + if (OB_UNLIKELY(fixed_length > file_col_expr_->max_length_ + && (is_byte_length || ObCharset::strlen_char(CS_TYPE_UTF8MB4_BIN, + pointer_cast(cur_v.ptr), + fixed_length) > file_col_expr_->max_length_))) { + ret = OB_ERR_DATA_TOO_LONG; + LOG_WARN("data too long", K(ret)); + } + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_string_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + int16_t max_def_level = reader_->descr()->max_definition_level(); + StrDiscVec *text_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + ObArrayWrap values; + + CK (VEC_DISCRETE == text_vec->get_format()); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + if (OB_UNLIKELY(values_cnt > row_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("repeated data not support"); + } else { + bool is_byte_length = is_oracle_byte_length( + lib::is_oracle_mode(), file_col_expr_->datum_meta_.length_semantics_); + int j = 0; + for (int i = 0; i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + text_vec->set_null(i); + } else { + parquet::ByteArray &cur_v = values.at(j++); + text_vec->set_string(i, pointer_cast(cur_v.ptr), cur_v.len); + if (OB_UNLIKELY(cur_v.len > file_col_expr_->max_length_ + && (is_byte_length || ObCharset::strlen_char(CS_TYPE_UTF8MB4_BIN, + pointer_cast(cur_v.ptr), + cur_v.len) > file_col_expr_->max_length_))) { + ret = OB_ERR_DATA_TOO_LONG; + LOG_WARN("data too long", K(ret)); + } + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_int32_to_int64_vec() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObFixedLengthBase *int32_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + ObArrayWrap values; + + CK (VEC_FIXED == int32_vec->get_format()); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + if (OB_UNLIKELY(values_cnt > row_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("repeated data not support"); + } else { + int j = 0; + for (int i = 0; i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + int32_vec->set_null(i); + } else { + int32_vec->set_int(i, values.at(j++)); + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_int64_to_int64_vec() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObFixedLengthBase *int64_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + ObArrayWrap values; + + CK (VEC_FIXED == int64_vec->get_format()); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + if (OB_UNLIKELY(values_cnt > row_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("repeated data not support"); + } else if (IS_PARQUET_COL_NOT_NULL && values_cnt == row_count_) { + MEMCPY(pointer_cast(int64_vec->get_data()), values.get_data(), sizeof(int64_t) * row_count_); + } else { + int j = 0; + for (int i = 0; i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + int64_vec->set_null(i); + } else { + int64_vec->set_int(i, values.at(j++)); + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_date_col_to_datetime() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + dec_vec->set_datetime(i, values.at(j++) * USECS_PER_DAY); + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_time_millis_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + dec_vec->set_time(i, values.at(j++) * USECS_PER_MSEC); + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_time_nanos_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + dec_vec->set_time(i, values.at(j++) / NSECS_PER_USEC); + } + } + } + return ret; +} + +bool ObParquetTableRowIterator::DataLoader::is_parquet_store_utc(const parquet::LogicalType *logtype) +{ + return logtype->is_timestamp() ? static_cast(logtype)->is_adjusted_to_utc() : true; +} + +bool ObParquetTableRowIterator::DataLoader::is_ob_type_store_utc(const ObDatumMeta &meta) +{ + return (lib::is_mysql_mode() && ObTimestampType == meta.type_) + || (lib::is_oracle_mode() && ObTimestampLTZType == meta.type_); +} + +int64_t ObParquetTableRowIterator::DataLoader::calc_tz_adjust_us() +{ + int64_t res = 0; + bool is_utc_src = is_parquet_store_utc(reader_->descr()->logical_type().get()); + bool is_utc_dst = is_ob_type_store_utc(file_col_expr_->datum_meta_); + if (is_utc_src != is_utc_dst) { + int32_t tmp_offset = 0; + if (OB_NOT_NULL(eval_ctx_.exec_ctx_.get_my_session()) + && OB_NOT_NULL(eval_ctx_.exec_ctx_.get_my_session()->get_timezone_info()) + && OB_SUCCESS == eval_ctx_.exec_ctx_.get_my_session()->get_timezone_info()->get_timezone_offset(0, tmp_offset)) { + res = SEC_TO_USEC(tmp_offset) * (is_utc_src ? 1 : -1); + } + } + return res; +} + +int ObParquetTableRowIterator::DataLoader::load_timestamp_millis_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + int64_t adjust_us = calc_tz_adjust_us(); + + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + int64_t adjusted_value = values.at(j++) * USECS_PER_MSEC + adjust_us; + if (ObTimestampType == file_col_expr_->datum_meta_.type_) { + dec_vec->set_timestamp(i, adjusted_value); + } else { + ObOTimestampData data; + data.time_us_ = adjusted_value; + dec_vec->set_otimestamp(i, data); + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_timestamp_micros_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + int64_t adjust_us = calc_tz_adjust_us(); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + int64_t adjusted_value = (values.at(j++) + adjust_us); + if (ObTimestampType == file_col_expr_->datum_meta_.type_) { + dec_vec->set_timestamp(i, adjusted_value); + } else { + ObOTimestampData data; + data.time_us_ = adjusted_value; + dec_vec->set_otimestamp(i, data); + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_timestamp_nanos_col() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + int64_t adjust_us = calc_tz_adjust_us(); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + if (ObTimestampType == file_col_expr_->datum_meta_.type_) { + dec_vec->set_timestamp(i, values.at(j++) / NSECS_PER_USEC + adjust_us); + } else { + ObOTimestampData data; + int64_t cur_value = values.at(j++); + data.time_us_ = cur_value / NSECS_PER_USEC + adjust_us; + data.time_ctx_.set_tail_nsec(cur_value % NSECS_PER_USEC); + dec_vec->set_otimestamp(i, data); + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_timestamp_hive() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + ObFixedLengthBase *dec_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObArrayWrap values; + int64_t adjust_us = calc_tz_adjust_us(); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + int j = 0; + for (int i = 0; OB_SUCC(ret) && i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + file_col_expr_->get_vector(eval_ctx_)->set_null(i); + } else { + parquet::Int96 &value = values.at(j++); + uint64_t nsec_time_value = ((uint64_t)value.value[1] << 32) + (uint64_t)value.value[0]; + uint32_t julian_date_value = value.value[2]; + int64_t utc_timestamp =((int64_t)julian_date_value - 2440588LL) * 86400000000LL + (int64_t)(nsec_time_value / NSECS_PER_USEC); + if (ObTimestampType == file_col_expr_->datum_meta_.type_) { + dec_vec->set_timestamp(i, utc_timestamp + adjust_us); + } else { + ObOTimestampData data; + data.time_us_ = utc_timestamp + adjust_us; + data.time_ctx_.set_tail_nsec((int32_t)(nsec_time_value % NSECS_PER_USEC)); + dec_vec->set_otimestamp(i, data); + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_float() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObFixedLengthBase *float_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + ObArrayWrap values; + + CK (VEC_FIXED == float_vec->get_format()); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + if (OB_UNLIKELY(values_cnt > row_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("repeated data not support"); + } else if (IS_PARQUET_COL_NOT_NULL && values_cnt == row_count_) { + MEMCPY(pointer_cast(float_vec->get_data()), values.get_data(), sizeof(float) * row_count_); + } else { + int j = 0; + for (int i = 0; i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + float_vec->set_null(i); + } else { + float_vec->set_float(i, values.at(j++)); + } + } + } + } + return ret; +} + +int ObParquetTableRowIterator::DataLoader::load_double() +{ + int ret = OB_SUCCESS; + int64_t values_cnt = 0; + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + int16_t max_def_level = reader_->descr()->max_definition_level(); + ObFixedLengthBase *double_vec = static_cast(file_col_expr_->get_vector(eval_ctx_)); + ObArrayWrap values; + + CK (VEC_FIXED == double_vec->get_format()); + OZ (values.allocate_array(tmp_alloc_g.get_allocator(), batch_size_)); + if (OB_SUCC(ret)) { + row_count_ = static_cast(reader_)->ReadBatch( + batch_size_, def_levels_buf_.get_data(), rep_levels_buf_.get_data(), + values.get_data(), &values_cnt); + if (OB_UNLIKELY(values_cnt > row_count_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("repeated data not support"); + } else if (IS_PARQUET_COL_NOT_NULL && values_cnt == row_count_) { + MEMCPY(pointer_cast(double_vec->get_data()), values.get_data(), sizeof(double) * row_count_); + } else { + int j = 0; + for (int i = 0; i < row_count_; i++) { + if (IS_PARQUET_COL_VALUE_IS_NULL(def_levels_buf_.at(i))) { + double_vec->set_null(i); + } else { + double_vec->set_double(i, values.at(j++)); + } + } + } + } + return ret; +} + +#undef IS_PARQUET_COL_NOT_NULL +#undef IS_PARQUET_COL_VALUE_IS_NULL + +int ObParquetTableRowIterator::get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + ObEvalCtx &eval_ctx = scan_param_->op_->get_eval_ctx(); + const ExprFixedArray &column_conv_exprs = *(scan_param_->ext_column_convert_exprs_); + int64_t read_count = 0; + ObMallocHookAttrGuard guard(mem_attr_); + + if (OB_SUCC(ret) && state_.cur_row_group_read_row_count_ >= state_.cur_row_group_row_count_) { + if (OB_FAIL(next_row_group())) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to next row group", K(ret)); + } + } + } + + if (!file_column_exprs_.count()) { + read_count = std::min(capacity, state_.cur_row_group_row_count_ - state_.cur_row_group_read_row_count_); + } else { + //load vec data from parquet file to file column expr + for (int i = 0; OB_SUCC(ret) && i < file_column_exprs_.count(); ++i) { + if (OB_UNLIKELY(!column_readers_.at(i).get()->HasNext())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("page end unexpected", K(ret)); + } + if (OB_SUCC(ret)) { + DataLoader loader(eval_ctx, file_column_exprs_.at(i), column_readers_.at(i).get(), + def_levels_buf_, rep_levels_buf_, capacity, read_count); + OZ (file_column_exprs_.at(i)->init_vector_for_write( + eval_ctx, file_column_exprs_.at(i)->get_default_res_format(), eval_ctx.max_batch_size_)); + OZ (loader.load_data_for_col(load_funcs_.at(i))); + file_column_exprs_.at(i)->set_evaluated_projected(eval_ctx); + } + } + } + if (OB_SUCC(ret) && read_count > 0) { + //fill expr results from metadata + for (int i = 0; OB_SUCC(ret) && i < file_meta_column_exprs_.count(); i++) { + ObExpr *meta_expr = file_meta_column_exprs_.at(i); + if (meta_expr->type_ == T_PSEUDO_EXTERNAL_FILE_URL) { + StrDiscVec *text_vec = static_cast(meta_expr->get_vector(eval_ctx)); + OZ (meta_expr->init_vector_for_write(eval_ctx, VEC_DISCRETE, read_count)); + if (OB_SUCC(ret)) { + text_vec->set_ptrs(file_url_ptrs_.get_data()); + text_vec->set_lens(file_url_lens_.get_data()); + } + } else if (meta_expr->type_ == T_PSEUDO_PARTITION_LIST_COL) { + OZ (meta_expr->init_vector_for_write(eval_ctx, VEC_UNIFORM, read_count)); + OZ (fill_file_partition_expr(meta_expr, state_.part_list_val_, read_count)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr", KPC(meta_expr)); + } + meta_expr->set_evaluated_projected(eval_ctx); + } + + for (int i = 0; OB_SUCC(ret) && i < column_exprs_.count(); i++) { + //column_conv_exprs is 1-1 mapped to column_exprs + //calc gen column exprs + if (!column_conv_exprs.at(i)->get_eval_info(eval_ctx).evaluated_) { + OZ (column_conv_exprs.at(i)->init_vector_default(eval_ctx, read_count)); + OZ (column_conv_exprs.at(i)->eval_vector(eval_ctx, *bit_vector_cache_, read_count, true)); + column_conv_exprs.at(i)->set_evaluated_projected(eval_ctx); + } + //assign gen column exprs value to column exprs(output exprs) + if (OB_SUCC(ret)) { + ObExpr *to = column_exprs_.at(i); + ObExpr *from = column_conv_exprs.at(i); + VectorHeader &to_vec_header = to->get_vector_header(eval_ctx); + VectorHeader &from_vec_header = from->get_vector_header(eval_ctx); + if (from_vec_header.format_ == VEC_UNIFORM_CONST) { + ObDatum *from_datum = + static_cast(from->get_vector(eval_ctx))->get_datums(); + OZ(to->init_vector(eval_ctx, VEC_UNIFORM, read_count)); + ObUniformBase *to_vec = static_cast(to->get_vector(eval_ctx)); + ObDatum *to_datums = to_vec->get_datums(); + for (int64_t j = 0; j < read_count && OB_SUCC(ret); j++) { + to_datums[j] = *from_datum; + } + } else if (from_vec_header.format_ == VEC_UNIFORM) { + ObUniformBase *uni_vec = static_cast(from->get_vector(eval_ctx)); + ObDatum *src = uni_vec->get_datums(); + ObDatum *dst = to->locate_batch_datums(eval_ctx); + if (src != dst) { + MEMCPY(dst, src, read_count * sizeof(ObDatum)); + } + OZ(to->init_vector(eval_ctx, VEC_UNIFORM, read_count)); + } else { + to_vec_header = from_vec_header; + } + column_exprs_.at(i)->set_evaluated_projected(eval_ctx); + } + } + } + if (OB_SUCC(ret)) { + state_.cur_row_group_read_row_count_ += read_count; + count = read_count; + } + return ret; +} + +int ObParquetTableRowIterator::get_next_row() +{ + int ret = OB_NOT_SUPPORTED; + return ret; +} + +void ObParquetTableRowIterator::reset() { + // reset state_ to initial values for rescan + state_.reuse(); +} + +int ObParquetTableRowIterator::calc_exprs_for_rowid(const int64_t read_count) +{ + int ret = OB_SUCCESS; + ObEvalCtx &eval_ctx = scan_param_->op_->get_eval_ctx(); + if (OB_NOT_NULL(file_id_expr_)) { + OZ (file_id_expr_->init_vector_for_write(eval_ctx, VEC_FIXED, read_count)); + for (int i = 0; OB_SUCC(ret) && i < read_count; i++) { + ObFixedLengthBase *vec = static_cast(file_id_expr_->get_vector(eval_ctx)); + vec->set_int(i, state_.cur_file_id_); + } + file_id_expr_->set_evaluated_flag(eval_ctx); + } + if (OB_NOT_NULL(line_number_expr_)) { + OZ (line_number_expr_->init_vector_for_write(eval_ctx, VEC_FIXED, read_count)); + for (int i = 0; OB_SUCC(ret) && i < read_count; i++) { + ObFixedLengthBase *vec = static_cast(line_number_expr_->get_vector(eval_ctx)); + vec->set_int(i, state_.cur_line_number_ + i); + } + line_number_expr_->set_evaluated_flag(eval_ctx); + } + state_.cur_line_number_ += read_count; + return ret; +} + + +} +} diff --git a/src/sql/engine/table/ob_parquet_table_row_iter.h b/src/sql/engine/table/ob_parquet_table_row_iter.h new file mode 100644 index 000000000..7dd52f177 --- /dev/null +++ b/src/sql/engine/table/ob_parquet_table_row_iter.h @@ -0,0 +1,236 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_PARQUET_TABLE_ROW_ITER_H +#define OB_PARQUET_TABLE_ROW_ITER_H + +#include +#include +#include +#include +#include + +#include "share/ob_i_tablet_scan.h" +#include "lib/file/ob_file.h" +#include "common/row/ob_row_iterator.h" +#include "storage/access/ob_dml_param.h" +#include "common/storage/ob_io_device.h" +#include "share/backup/ob_backup_struct.h" +#include "sql/engine/table/ob_external_table_access_service.h" + +namespace oceanbase { +namespace sql { + +class ObArrowMemPool : public ::arrow::MemoryPool +{ +public: + ObArrowMemPool() : total_alloc_size_(0) {} + void init(uint64_t tenant_id); + virtual arrow::Status Allocate(int64_t size, uint8_t** out) override; + + virtual arrow::Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override; + + virtual void Free(uint8_t* buffer, int64_t size) override; + + virtual void ReleaseUnused() override; + + virtual int64_t bytes_allocated() const override; + + virtual int64_t max_memory() const override { return -1; } + + virtual std::string backend_name() const override { return "Arrow"; } +private: + common::ObArenaAllocator alloc_; + common::ObMemAttr mem_attr_; + arrow::internal::MemoryPoolStats stats_; + int64_t total_alloc_size_; +}; + + +class ObArrowFile : public arrow::io::RandomAccessFile { +public: + ObArrowFile(ObExternalDataAccessDriver &file_reader, const char*file_name, arrow::MemoryPool *pool) + : file_reader_(file_reader), file_name_(file_name), pool_(pool) + {} + ~ObArrowFile() override { + file_reader_.close(); + } + + int open(); + + virtual arrow::Status Close() override; + + virtual bool closed() const override; + + virtual arrow::Result Read(int64_t nbytes, void* out) override; + virtual arrow::Result> Read(int64_t nbytes) override; + virtual arrow::Result ReadAt(int64_t position, int64_t nbytes, void* out) override; + virtual arrow::Result> ReadAt(int64_t position, int64_t nbytes) override; + + + virtual arrow::Status Seek(int64_t position) override; + virtual arrow::Result Tell() const override; + virtual arrow::Result GetSize() override; +private: + ObExternalDataAccessDriver &file_reader_; + const char* file_name_; + arrow::MemoryPool *pool_; + int64_t position_; +}; + +class ObParquetTableRowIterator : public ObExternalTableRowIterator { +public: + struct StateValues { + StateValues() : + file_idx_(0), + part_id_(0), + row_group_idx_(0), + cur_file_id_(0), + cur_row_group_idx_(0), + end_row_group_idx_(-1), + cur_row_group_read_row_count_(0), + cur_row_group_row_count_(0), + cur_line_number_(0) {} + + void reuse() { + file_idx_ = 0; + part_id_ = 0; + row_group_idx_ = 0; + cur_file_id_ = 0; + cur_row_group_idx_ = 0; + end_row_group_idx_ = -1; + cur_row_group_read_row_count_ = 0; + cur_row_group_row_count_ = 0; + cur_line_number_ = 0; + cur_file_url_.reset(); + part_list_val_.reset(); + } + + int64_t file_idx_; + int64_t part_id_; + int64_t row_group_idx_; + int64_t cur_file_id_; + int64_t cur_row_group_idx_; + int64_t end_row_group_idx_; + int64_t cur_row_group_read_row_count_; + int64_t cur_row_group_row_count_; + int64_t cur_line_number_; + ObString cur_file_url_; + ObNewRow part_list_val_; + }; +public: + ObParquetTableRowIterator() : + read_props_(&arrow_alloc_), + file_column_exprs_(allocator_), + file_meta_column_exprs_(allocator_), + bit_vector_cache_(NULL) {} + virtual ~ObParquetTableRowIterator() {} + + int init(const storage::ObTableScanParam *scan_param) override; + int get_next_row() override; + int get_next_rows(int64_t &count, int64_t capacity) override; + + virtual int get_next_row(ObNewRow *&row) override { + UNUSED(row); + return common::OB_ERR_UNEXPECTED; + } + + virtual void reset() override; +private: + // load vec data from parquet file to expr mem + struct DataLoader { + DataLoader(ObEvalCtx &eval_ctx, + ObExpr *file_col_expr, + parquet::ColumnReader *reader, + common::ObIArrayWrap &def_levels_buf, + common::ObIArrayWrap &rep_levels_buf, + const int64_t batch_size, + int64_t &row_count): + eval_ctx_(eval_ctx), + file_col_expr_(file_col_expr), + reader_(reader), + batch_size_(batch_size), + row_count_(row_count), + def_levels_buf_(def_levels_buf), + rep_levels_buf_(rep_levels_buf) + {} + typedef int (DataLoader::*LOAD_FUNC)(); + static LOAD_FUNC select_load_function(const ObDatumMeta &datum_type, + const parquet::ColumnDescriptor *col_desc); + int16_t get_max_def_level(); + int load_data_for_col(LOAD_FUNC &func); + + int load_int64_to_int64_vec(); + int load_int32_to_int64_vec(); + int load_int32_to_int32_vec(); + int load_string_col(); + int load_fixed_string_col(); + int load_decimal_any_col(); + //[TODO EXTERNAL TABLE] float16 + int load_date_col_to_datetime(); + int load_time_millis_col(); + int load_time_nanos_col(); + int load_timestamp_millis_col(); + int load_timestamp_micros_col(); + int load_timestamp_nanos_col(); + int load_timestamp_hive(); + int load_float(); + int load_double(); + + int to_numeric(const int64_t idx, const int64_t int_value); + int to_numeric(const int64_t idx, const char *str, const int32_t length); + int to_numeric_hive(const int64_t idx, const char *str, const int32_t length, char *buf, const int64_t data_len); + int64_t calc_tz_adjust_us(); + bool check_char_len(const char *ptr, int32_t len); + + static bool is_ob_type_store_utc(const ObDatumMeta &meta); + static bool is_parquet_store_utc(const parquet::LogicalType *logtype); + + ObEvalCtx &eval_ctx_; + ObExpr *file_col_expr_; + parquet::ColumnReader *reader_; + const int64_t batch_size_; + int64_t &row_count_; + common::ObIArrayWrap &def_levels_buf_; + common::ObIArrayWrap &rep_levels_buf_; + }; +private: + int next_file(); + int next_row_group(); + int calc_exprs_for_rowid(const int64_t read_count); + int calc_pseudo_exprs(const int64_t read_count); +private: + StateValues state_; + lib::ObMemAttr mem_attr_; + ObArenaAllocator allocator_; + ObArrowMemPool arrow_alloc_; + parquet::ReaderProperties read_props_; + ObExternalDataAccessDriver data_access_driver_; + std::unique_ptr file_reader_; + std::shared_ptr file_meta_; + ExprFixedArray file_column_exprs_; //column value from parquet file + ExprFixedArray file_meta_column_exprs_; //column value from file meta + common::ObArrayWrap column_indexs_; + common::ObArrayWrap> column_readers_; + common::ObArrayWrap load_funcs_; + ObSqlString url_; + ObBitVector *bit_vector_cache_; + common::ObArrayWrap def_levels_buf_; + common::ObArrayWrap rep_levels_buf_; + common::ObArrayWrap file_url_ptrs_; //for file url expr + common::ObArrayWrap file_url_lens_; //for file url expr +}; + +} +} + +#endif // OB_PARQUET_TABLE_ROW_ITER_H diff --git a/src/sql/printer/ob_raw_expr_printer.cpp b/src/sql/printer/ob_raw_expr_printer.cpp index c348ab025..04ef24e83 100644 --- a/src/sql/printer/ob_raw_expr_printer.cpp +++ b/src/sql/printer/ob_raw_expr_printer.cpp @@ -4033,6 +4033,7 @@ int ObRawExprPrinter::print(ObPseudoColumnRawExpr *expr) } case T_PSEUDO_PARTITION_LIST_COL: case T_PSEUDO_EXTERNAL_FILE_URL: + case T_PSEUDO_EXTERNAL_FILE_ROW: case T_PSEUDO_EXTERNAL_FILE_COL: { if (!expr->get_table_name().empty()) { PRINT_IDENT(expr->get_table_name()); diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp index a5e59b3c6..70c65fce8 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp @@ -5468,10 +5468,9 @@ int ObAlterTableResolver::resolve_alter_table_column_definition(AlterColumnSchem } else if (OB_FAIL(tmp_table_schema.assign(*table_schema_))) { LOG_WARN("failed to assign a table schema", K(ret)); } else if (OB_FAIL(resolve_column_definition(column, node, stat, - is_modify_column_visibility, pk_name, + is_modify_column_visibility, pk_name, *table_schema_, is_oracle_temp_table, false, - false, allow_has_default))) { SQL_RESV_LOG(WARN, "resolve column definition failed", K(ret)); } else if (is_mysql_mode()){ // add column level constraint diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.cpp b/src/sql/resolver/ddl/ob_create_table_resolver.cpp index 76b74bb1a..9fd039337 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_table_resolver.cpp @@ -598,6 +598,13 @@ int ObCreateTableResolver::resolve(const ParseNode &parse_tree) // do nothing } + if (OB_SUCC(ret) && is_external_table_) { + //before resolve table elements + if (OB_FAIL(resolve_external_table_format_early(create_table_node->children_[4]))) { + LOG_WARN("fail to resolve external file format", K(ret)); + } + } + // 1、 resolve table_id first for check whether is inner_table if (OB_SUCC(ret) && OB_FAIL(resolve_table_id_pre(create_table_node->children_[4]))) { SQL_RESV_LOG(WARN, "resolve_table_id_pre failed", K(ret)); @@ -1445,9 +1452,9 @@ int ObCreateTableResolver::resolve_table_elements(const ParseNode *node, if (OB_FAIL(resolve_column_definition(column, element, stat, is_modify_column_visibility, pk_name, + table_schema, is_oracle_temp_table_, - is_create_table_as, - table_schema.is_external_table()))) { + is_create_table_as))) { SQL_RESV_LOG(WARN, "resolve column definition failed", K(ret)); } else if (!column.is_udt_related_column(lib::is_oracle_mode()) && // udt column will check after hidden column generated OB_FAIL(check_default_value(column.get_cur_default_value(), @@ -3103,6 +3110,43 @@ int ObCreateTableResolver::resolve_index_name( return ret; } +int ObCreateTableResolver::resolve_external_table_format_early(const ParseNode *node) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(node)) { + if (T_TABLE_OPTION_LIST != node->type_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid argument.", K(ret)); + } else { + ParseNode *option_node = NULL; + int32_t num = node->num_child_; + for (int32_t i = 0; OB_SUCC(ret) && i < num; ++i) { + option_node = node->children_[i]; + if (OB_NOT_NULL(option_node) && T_EXTERNAL_FILE_FORMAT == option_node->type_) { + ObExternalFileFormat format; + for (int32_t j = 0; OB_SUCC(ret) && j < option_node->num_child_; ++j) { + if (OB_NOT_NULL(option_node->children_[j]) + && T_EXTERNAL_FILE_FORMAT_TYPE == option_node->children_[j]->type_) { + if (OB_FAIL(resolve_file_format(option_node->children_[j], format))) { + LOG_WARN("fail to resolve file format", K(ret)); + } else { + external_table_format_type_ = format.format_type_; + } + } + } + } + } + } + } + if (OB_SUCC(ret) && external_table_format_type_ >= ObExternalFileFormat::PARQUET_FORMAT) { + uint64_t data_version = 0; + CK (OB_NOT_NULL(session_info_)); + OZ (GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), data_version)); + OV (DATA_VERSION_4_3_2_0 <= data_version, OB_NOT_SUPPORTED, data_version); + } + return ret; +} + int ObCreateTableResolver::resolve_table_charset_info(const ParseNode *node) { int ret = OB_SUCCESS; if (NULL != node) { diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.h b/src/sql/resolver/ddl/ob_create_table_resolver.h index 221ecfe8c..c0d86ee2b 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.h +++ b/src/sql/resolver/ddl/ob_create_table_resolver.h @@ -90,6 +90,7 @@ private: int set_temp_table_info(share::schema::ObTableSchema &table_schema, ParseNode *commit_option_node); int resolve_table_charset_info(const ParseNode *node); + int resolve_external_table_format_early(const ParseNode *node); //index int add_sort_column(const obrpc::ObColumnSortItem &sort_column); int generate_index_arg(); diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index 3f077e692..5079a0174 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -118,7 +118,8 @@ ObDDLResolver::ObDDLResolver(ObResolverParams ¶ms) have_generate_fts_arg_(false), is_set_lob_inrow_threshold_(false), lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD), - auto_increment_cache_size_(0) + auto_increment_cache_size_(0), + external_table_format_type_(ObExternalFileFormat::INVALID_FORMAT) { table_mode_.reset(); } @@ -2661,9 +2662,13 @@ int ObDDLResolver::resolve_file_format(const ParseNode *node, ObExternalFileForm switch (node->type_) { case T_EXTERNAL_FILE_FORMAT_TYPE: { ObString string_v = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); - if (0 == string_v.case_compare("CSV")) { - format.format_type_ = ObExternalFileFormat::CSV_FORMAT; - } else { + for (int i = 0; i < ObExternalFileFormat::MAX_FORMAT; i++) { + if (0 == string_v.case_compare(ObExternalFileFormat::FORMAT_TYPE_STR[i])) { + format.format_type_ = static_cast(i); + break; + } + } + if (ObExternalFileFormat::INVALID_FORMAT == format.format_type_) { ObSqlString err_msg; err_msg.append_fmt("format '%.*s'", string_v.length(), string_v.ptr()); ret = OB_NOT_SUPPORTED; @@ -3021,12 +3026,13 @@ int ObDDLResolver::resolve_column_definition(ObColumnSchemaV2 &column, ObColumnResolveStat &resolve_stat, bool &is_modify_column_visibility, common::ObString &pk_name, + const ObTableSchema &table_schema, const bool is_oracle_temp_table, const bool is_create_table_as, - const bool is_external_table, const bool allow_has_default) { int ret = OB_SUCCESS; + bool is_external_table = table_schema.is_external_table(); bool is_modify_column = stmt::T_ALTER_TABLE == stmt_->get_stmt_type() && OB_DDL_MODIFY_COLUMN == (static_cast(column)).alter_type_; ParseNode *column_definition_ref_node = NULL; @@ -3331,15 +3337,13 @@ int ObDDLResolver::resolve_column_definition(ObColumnSchemaV2 &column, } } else if (is_external_table) { //mock generated column - uint64_t file_column_idx = column.get_column_id() - OB_APP_MIN_COLUMN_ID + 1; - ObSqlString temp_str; + ObExternalFileFormat format; + format.format_type_ = external_table_format_type_; ObString mock_gen_column_str; - ObObj default_value; - if (OB_FAIL(temp_str.append_fmt("%s%lu", N_EXTERNAL_FILE_COLUMN_PREFIX, file_column_idx))) { - LOG_WARN("fail to append sql str", K(ret)); - } else if (OB_FAIL(ob_write_string(*allocator_, temp_str.string(), mock_gen_column_str))) { - LOG_WARN("fail to write string", K(ret)); + if (OB_FAIL(format.mock_gen_column_def(column, *allocator_, mock_gen_column_str))) { + LOG_WARN("fail to mock gen column def", K(ret)); } else { + ObObj default_value; default_value.set_varchar(mock_gen_column_str); default_value.set_collation_type(ObCharset::get_system_collation()); if (OB_FAIL(column.set_cur_default_value(default_value))) { diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.h b/src/sql/resolver/ddl/ob_ddl_resolver.h index cb33f2a15..bb2d3e9e0 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.h +++ b/src/sql/resolver/ddl/ob_ddl_resolver.h @@ -570,9 +570,9 @@ protected: ObColumnResolveStat &reslove_stat, bool &is_modify_column_visibility, common::ObString &pk_name, + const ObTableSchema &table_schema, const bool is_oracle_temp_table = false, const bool is_create_table_as = false, - const bool is_external_table = false, const bool allow_has_default = true); int resolve_file_prefix(ObString &url, ObSqlString &prefix_str, common::ObStorageType &device_type); int resolve_uk_name_from_column_attribute( @@ -1030,6 +1030,7 @@ protected: bool is_set_lob_inrow_threshold_; int64_t lob_inrow_threshold_; int64_t auto_increment_cache_size_; + ObExternalFileFormat::FormatType external_table_format_type_; private: template DISALLOW_COPY_AND_ASSIGN(ObDDLResolver); diff --git a/src/sql/resolver/dml/ob_dml_resolver.cpp b/src/sql/resolver/dml/ob_dml_resolver.cpp index 825a4bd7a..0dbadf4b8 100755 --- a/src/sql/resolver/dml/ob_dml_resolver.cpp +++ b/src/sql/resolver/dml/ob_dml_resolver.cpp @@ -8143,6 +8143,133 @@ int ObDMLResolver::add_additional_function_according_to_type(const ColumnItem *c return ret; } +int search_parquet_expr(ObRawExpr *root, ObRawExpr *file_row_expr, ObRawExpr *&pattern_expr) { + int ret = OB_SUCCESS; + ObRawExpr *get_path_expr = NULL; + pattern_expr = NULL; + if (OB_ISNULL(root) || OB_ISNULL(file_row_expr)) { + ret = OB_ERR_UNEXPECTED; + } else if (T_FUN_SYS_CAST == root->get_expr_type()) { + if (root->get_param_count() <= 0 || OB_ISNULL(get_path_expr = root->get_param_expr(0))) { + ret = OB_ERR_UNEXPECTED; + } else { + if (T_FUN_SYS_GET_PATH == get_path_expr->get_expr_type()) { + if (get_path_expr->get_param_count() > 0 && get_path_expr->get_param_expr(0) == file_row_expr) { + pattern_expr = root; + } + } + } + } + for (int i = 0; OB_SUCC(ret) && NULL == pattern_expr && i < root->get_param_count(); i++) { + if (OB_FAIL(SMART_CALL(search_parquet_expr(root->get_param_expr(i), file_row_expr, pattern_expr)))) { + LOG_WARN("fail to search parquet column expr", K(ret)); + } + } + return ret; +} + +int ObDMLResolver::resolve_external_table_generated_column( + ObQualifiedName &col, + const TableItem &table_item, + const ObTableSchema *table_schema, + const ObColumnSchemaV2 *column_schema, + ObRawExpr *&real_ref_expr, + ObRawExpr *&ref_expr) +{ + int ret = OB_SUCCESS; + uint64_t file_column_idx = UINT64_MAX; + if (OB_ISNULL(table_schema) || OB_ISNULL(column_schema) || OB_ISNULL(ref_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected arg", KP(table_schema), KP(column_schema), KP(ref_expr)); + } else if (0 == col.col_name_.case_compare(N_EXTERNAL_FILE_URL)) { + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_file_url( + *params_.expr_factory_, *params_.session_info_, + table_item.table_id_, table_item.table_name_, + col.col_name_, real_ref_expr))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } else if (col.col_name_.prefix_match_ci(N_PARTITION_LIST_COL)) { + if (OB_FAIL(ObResolverUtils::calc_file_column_idx(col.col_name_, file_column_idx))) { + LOG_WARN("fail to calc file column idx", K(ret)); + } else if (nullptr == (real_ref_expr = ObResolverUtils::find_file_column_expr( + pseudo_external_file_col_exprs_, table_item.table_id_, file_column_idx, col.col_name_))) { + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_partition_list_col( + *params_.expr_factory_, *params_.session_info_, + table_item.table_id_, table_item.table_name_, + col.col_name_, file_column_idx, real_ref_expr, + column_schema))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } + } else { + ObExternalFileFormat format; + if (OB_FAIL(format.load_from_string(table_schema->get_external_file_format(), *params_.allocator_))) { + LOG_WARN("load from string failed", K(ret)); + } else if (format.format_type_ != ObResolverUtils::resolve_external_file_column_type(col.col_name_)) { + ret = OB_WRONG_COLUMN_NAME; + LOG_USER_ERROR(OB_WRONG_COLUMN_NAME, col.col_name_.length(), col.col_name_.ptr()); + } else if (ObExternalFileFormat::CSV_FORMAT == format.format_type_) { + if (OB_FAIL(ObResolverUtils::calc_file_column_idx(col.col_name_, file_column_idx))) { + LOG_WARN("fail to calc file column idx", K(ret)); + } else if (nullptr == (real_ref_expr = ObResolverUtils::find_file_column_expr( + pseudo_external_file_col_exprs_, table_item.table_id_, file_column_idx, col.col_name_))) { + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_csv( + *params_.expr_factory_, *params_.session_info_, + table_item.table_id_, table_item.table_name_, + col.col_name_, file_column_idx, + real_ref_expr, format))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } + } else if (ObExternalFileFormat::PARQUET_FORMAT == format.format_type_) { + ObRawExpr *cast_expr = NULL; + ObRawExpr *get_path_expr = NULL; + ObRawExpr *cast_type_expr = NULL; + if (T_FUN_SYS_GET_PATH == ref_expr->get_expr_type()) { + // GET_PATH(N_EXTERNAL_FILE_ROW, 'xxx') + if (ref_expr->get_param_count() > 0 && ref_expr->get_param_expr(0) == col.ref_expr_) { + get_path_expr = ref_expr; + cast_type_expr = NULL; //using column type as result type + } + } else { + // search pattern: cast(GET_PATH(N_EXTERNAL_FILE_ROW, 'xxx') as xxx) + if (OB_FAIL(search_parquet_expr(ref_expr, col.ref_expr_, cast_expr))) { + LOG_WARN("fail to serach parquet path expr", K(ret)); + } else if (OB_NOT_NULL(cast_expr)) { + if (cast_expr->get_param_count() != 2 + || OB_ISNULL(get_path_expr = cast_expr->get_param_expr(0)) + || OB_ISNULL(cast_type_expr = cast_expr->get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected cast expr", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + ObRawExpr *pattern_expr = OB_NOT_NULL(cast_expr) ? cast_expr : get_path_expr; + if (OB_ISNULL(pattern_expr)) { + ret = OB_ERR_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN; + LOG_WARN("invalid generated column define for external table", K(ret)); + } else if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_parquet( + *params_.expr_factory_, *params_.session_info_, + table_item.table_id_, table_item.table_name_, + col.col_name_, get_path_expr, + cast_expr, column_schema, real_ref_expr))) { + LOG_WARN("fail to build file column expr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::replace_ref_column(ref_expr, pattern_expr, real_ref_expr))) { + LOG_WARN("replace column reference expr failed", K(ret)); + } + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(pseudo_external_file_col_exprs_.push_back(real_ref_expr))) { + LOG_WARN("fail to push back to array", K(ret)); + } + } + LOG_TRACE("add external file column", KPC(real_ref_expr), K(col.col_name_)); + return ret; +} + int ObDMLResolver::resolve_generated_column_expr(const ObString &expr_str, const TableItem &table_item, const ObColumnSchemaV2 *column_schema, const ObColumnRefRawExpr &column, ObRawExpr *&ref_expr, @@ -8265,28 +8392,11 @@ int ObDMLResolver::resolve_generated_column_expr(const ObString &expr_str, } } } else if (table_schema->is_external_table() - && ObResolverUtils::is_external_file_column_name(columns.at(i).col_name_)) { - uint64_t file_column_idx = UINT64_MAX; - if (OB_ISNULL(stmt)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret)); - } else if (OB_FAIL(ObResolverUtils::calc_file_column_idx(columns.at(i).col_name_, file_column_idx))) { - LOG_WARN("fail to calc file column idx", K(ret)); - } else if (nullptr == (real_ref_expr = ObResolverUtils::find_file_column_expr( - pseudo_external_file_col_exprs_, table_item.table_id_, file_column_idx, columns.at(i).col_name_))) { - ObExternalFileFormat format; - if (OB_FAIL(format.load_from_string(table_schema->get_external_file_format(), *params_.allocator_))) { - LOG_WARN("load from string failed", K(ret)); - } else if (OB_FAIL(ObResolverUtils::build_file_column_expr(*params_.expr_factory_, *params_.session_info_, - table_item.table_id_, table_item.alias_name_, - columns.at(i).col_name_, file_column_idx, real_ref_expr, - format.csv_format_.cs_type_, column_schema))) { - LOG_WARN("fail to build external table file column expr", K(ret)); - } else if (OB_FAIL(pseudo_external_file_col_exprs_.push_back(real_ref_expr))) { - LOG_WARN("fail to push back to array", K(ret)); - } + && ObResolverUtils::is_external_pseudo_column_name(columns.at(i).col_name_)) { + if (OB_FAIL(resolve_external_table_generated_column(columns.at(i), table_item, table_schema, + column_schema, real_ref_expr, ref_expr))) { + LOG_WARN("fail to resolve external table generated column", K(ret)); } - LOG_TRACE("add external file column", KPC(real_ref_expr), K(columns.at(i).col_name_), K(table_item)); } else { if (OB_FAIL(resolve_basic_column_item(table_item, columns.at(i).col_name_, include_hidden, col_item, stmt))) { @@ -13198,10 +13308,10 @@ int ObDMLResolver::resolve_pseudo_column( } else if (NULL != (real_ref_expr = ObResolverUtils::find_file_column_expr( pseudo_external_file_col_exprs_, table_item->table_id_, UINT64_MAX, q_name.col_name_))) { LOG_TRACE("find file name pseudo column", K(*real_ref_expr)); - } else if (OB_FAIL(ObResolverUtils::build_file_column_expr(*params_.expr_factory_, *params_.session_info_, - table_item->table_id_, table_item->alias_name_, - q_name.col_name_, UINT64_MAX, real_ref_expr, - CHARSET_UTF8MB4))) { + } else if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_file_url( + *params_.expr_factory_, *params_.session_info_, + table_item->table_id_, table_item->alias_name_, + q_name.col_name_, real_ref_expr))) { LOG_WARN("fail to build external table file column expr", K(ret)); } else if (OB_FAIL(pseudo_external_file_col_exprs_.push_back(real_ref_expr))) { LOG_WARN("fail to push back to array", K(ret)); diff --git a/src/sql/resolver/dml/ob_dml_resolver.h b/src/sql/resolver/dml/ob_dml_resolver.h index f63cf850b..e587a82bb 100644 --- a/src/sql/resolver/dml/ob_dml_resolver.h +++ b/src/sql/resolver/dml/ob_dml_resolver.h @@ -213,6 +213,13 @@ public: int fill_same_column_to_using(JoinedTable* &joined_table); int get_columns_from_table_item(const TableItem *table_item, common::ObIArray &column_names); + int resolve_external_table_generated_column( + ObQualifiedName &col, + const TableItem &table_item, + const share::schema::ObTableSchema *table_schema, + const share::schema::ObColumnSchemaV2 *column_schema, + ObRawExpr *&real_ref_expr, + ObRawExpr *&ref_expr); int resolve_using_columns(const ParseNode &using_node, common::ObIArray &column_names); int transfer_using_to_on_expr(JoinedTable *&joined_table); int resolve_table_column_expr(const ObQualifiedName &q_name, ObRawExpr *&real_ref_expr); diff --git a/src/sql/resolver/expr/ob_raw_expr.cpp b/src/sql/resolver/expr/ob_raw_expr.cpp index bd68560a8..caf8c4b8e 100644 --- a/src/sql/resolver/expr/ob_raw_expr.cpp +++ b/src/sql/resolver/expr/ob_raw_expr.cpp @@ -6179,6 +6179,7 @@ int ObPseudoColumnRawExpr::assign(const ObRawExpr &other) cte_cycle_default_value_ = tmp.cte_cycle_default_value_; table_id_ = tmp.table_id_; table_name_ = tmp.table_name_; + data_access_path_ = tmp.data_access_path_; } } return ret; @@ -6199,7 +6200,8 @@ bool ObPseudoColumnRawExpr::inner_same_as(const ObRawExpr &expr, { UNUSED(check_context); return type_ == expr.get_expr_type() && - table_id_ == static_cast(expr).get_table_id(); + table_id_ == static_cast(expr).get_table_id() && + 0 == data_access_path_.compare(static_cast(expr).get_data_access_path()); } int ObPseudoColumnRawExpr::do_visit(ObRawExprVisitor &visitor) @@ -6259,6 +6261,7 @@ int ObPseudoColumnRawExpr::get_name_internal(char *buf, const int64_t buf_len, i case T_PSEUDO_EXTERNAL_FILE_URL: case T_PSEUDO_PARTITION_LIST_COL: case T_PSEUDO_EXTERNAL_FILE_COL: + case T_PSEUDO_EXTERNAL_FILE_ROW: if (!table_name_.empty() && OB_FAIL(BUF_PRINTF("%.*s.", table_name_.length(), table_name_.ptr()))) { LOG_WARN("failed to print table name", K(ret)); } else if (OB_FAIL(databuff_print_obj(buf, buf_len, pos, expr_name_))) { diff --git a/src/sql/resolver/expr/ob_raw_expr.h b/src/sql/resolver/expr/ob_raw_expr.h index 7ec267c53..97d400ce2 100644 --- a/src/sql/resolver/expr/ob_raw_expr.h +++ b/src/sql/resolver/expr/ob_raw_expr.h @@ -4741,18 +4741,22 @@ public: uint64_t get_table_id() const { return table_id_; } void set_table_name(const common::ObString &table_name) { table_name_ = table_name; } const common::ObString & get_table_name() const { return table_name_; } + void set_data_access_path(const common::ObString &data_access_path) { data_access_path_ = data_access_path; } + const common::ObString & get_data_access_path() const { return data_access_path_; } VIRTUAL_TO_STRING_KV(N_ITEM_TYPE, type_, N_RESULT_TYPE, result_type_, N_EXPR_INFO, info_, N_REL_ID, rel_ids_, N_TABLE_ID, table_id_, - N_TABLE_NAME, table_name_); + N_TABLE_NAME, table_name_, + K_(data_access_path)); private: ObRawExpr *cte_cycle_value_; ObRawExpr *cte_cycle_default_value_; uint64_t table_id_; common::ObString table_name_; + common::ObString data_access_path_; //for external table column DISALLOW_COPY_AND_ASSIGN(ObPseudoColumnRawExpr); }; diff --git a/src/sql/resolver/expr/ob_raw_expr_util.cpp b/src/sql/resolver/expr/ob_raw_expr_util.cpp index 380613b55..f5ffd5581 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_util.cpp @@ -2460,7 +2460,7 @@ int ObRawExprUtils::build_generated_column_expr(ObRawExprFactory &expr_factory, && true == need_check_simple_column && T_REF_COLUMN == expr->get_expr_type() && !(columns.count() == 1 - && ObResolverUtils::is_external_file_column_name(columns.at(0).col_name_))) { + && ObResolverUtils::is_external_pseudo_column_name(columns.at(0).col_name_))) { ret = OB_ERR_INVALID_COLUMN_EXPRESSION; LOG_WARN("simple column is not allowed in Oracle mode", K(ret), K(*expr)); } diff --git a/src/sql/resolver/ob_resolver_utils.cpp b/src/sql/resolver/ob_resolver_utils.cpp index df3c882aa..01b48d9cc 100644 --- a/src/sql/resolver/ob_resolver_utils.cpp +++ b/src/sql/resolver/ob_resolver_utils.cpp @@ -43,6 +43,7 @@ #include "sql/engine/expr/ob_expr_unistr.h" #include "sql/resolver/dml/ob_inlist_resolver.h" #include "lib/charset/ob_ctype.h" +#include "sql/engine/expr/ob_expr_cast.h" namespace oceanbase { @@ -4812,24 +4813,54 @@ int ObResolverUtils::resolve_external_table_column_def(ObRawExprFactory &expr_fa int ret = OB_SUCCESS; ObRawExpr *file_column_expr = nullptr; uint64_t file_column_idx = UINT64_MAX; - if (!ObResolverUtils::is_external_file_column_name(q_name.col_name_)) { + if (!ObResolverUtils::is_external_pseudo_column_name(q_name.col_name_)) { ret = OB_ERR_BAD_FIELD_ERROR; ObString scope_name = "external file column"; LOG_USER_ERROR(OB_ERR_BAD_FIELD_ERROR, q_name.col_name_.length(), q_name.col_name_.ptr(), scope_name.length(), scope_name.ptr()); - } else if (OB_FAIL(ObResolverUtils::calc_file_column_idx(q_name.col_name_, file_column_idx))) { - LOG_WARN("fail to calc file column idx", K(ret)); - } else if (nullptr == (file_column_expr = ObResolverUtils::find_file_column_expr( - real_exprs, OB_INVALID_ID, file_column_idx, q_name.col_name_))) { - ObString table_name; - if (OB_FAIL(ObResolverUtils::build_file_column_expr(expr_factory, session_info, OB_INVALID_ID, - table_name, q_name.col_name_, - file_column_idx, file_column_expr, CHARSET_UTF8MB4, gen_col_schema))) { - LOG_WARN("fail to build external table file column expr", K(ret)); - } else if (OB_FAIL(real_exprs.push_back(file_column_expr))) { - LOG_WARN("fail to push back expr", K(ret)); + } else { + if (0 == q_name.col_name_.case_compare(N_EXTERNAL_FILE_URL)) { + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_file_url(expr_factory, session_info, + OB_INVALID_ID, ObString(), q_name.col_name_, file_column_expr))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } else if (q_name.col_name_.prefix_match_ci(N_PARTITION_LIST_COL)) { + if (OB_FAIL(ObResolverUtils::calc_file_column_idx(q_name.col_name_, file_column_idx))) { + LOG_WARN("fail to calc file column idx", K(ret)); + } else if (nullptr == (file_column_expr = ObResolverUtils::find_file_column_expr( + real_exprs, OB_INVALID_ID, file_column_idx, q_name.col_name_))) { + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_partition_list_col(expr_factory, + session_info, OB_INVALID_ID, ObString(), + q_name.col_name_, file_column_idx, file_column_expr, gen_col_schema))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } + } else if (ObExternalFileFormat::CSV_FORMAT == ObResolverUtils::resolve_external_file_column_type(q_name.col_name_)) { + if (OB_FAIL(ObResolverUtils::calc_file_column_idx(q_name.col_name_, file_column_idx))) { + LOG_WARN("fail to calc file column idx", K(ret)); + } else if (nullptr == (file_column_expr = ObResolverUtils::find_file_column_expr( + real_exprs, OB_INVALID_ID, file_column_idx, q_name.col_name_))) { + ObExternalFileFormat temp_format; + temp_format.csv_format_.init_format(ObDataInFileStruct(), 0, CS_TYPE_UTF8MB4_BIN); + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_csv(expr_factory, session_info, + OB_INVALID_ID, ObString(), q_name.col_name_, file_column_idx, file_column_expr, temp_format))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } + } else { + if (OB_FAIL(ObResolverUtils::build_file_row_expr_for_parquet(expr_factory, session_info, + OB_INVALID_ID, ObString(), + q_name.col_name_, file_column_expr))) { + LOG_WARN("fail to build file column expr", K(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(real_exprs.push_back(file_column_expr))) { + LOG_WARN("fail to push back expr", K(ret)); + } } } + if (OB_SUCC(ret)) { if (OB_FAIL(ObTransformUtils::replace_expr(q_name.ref_expr_, file_column_expr, expr))) { LOG_WARN("fail replace expr", K(ret)); @@ -4839,43 +4870,183 @@ int ObResolverUtils::resolve_external_table_column_def(ObRawExprFactory &expr_fa return ret; } -bool ObResolverUtils::is_external_file_column_name(const ObString &name) +bool ObResolverUtils::is_external_pseudo_column_name(const ObString &name) { - return name.prefix_match_ci(N_EXTERNAL_FILE_COLUMN_PREFIX) + return is_external_file_column_name(name) || 0 == name.case_compare(N_EXTERNAL_FILE_URL) || name.prefix_match_ci(N_PARTITION_LIST_COL); } -int ObResolverUtils::build_file_column_expr(ObRawExprFactory &expr_factory, - const ObSQLSessionInfo &session_info, - const uint64_t table_id, - const ObString &table_name, - const ObString &column_name, - int64_t column_idx, - ObRawExpr *&expr, - ObCharsetType cs_type, - const ObColumnSchemaV2 *generated_column) +bool ObResolverUtils::is_external_file_column_name(const ObString &name) +{ + ObExternalFileFormat::FormatType type = resolve_external_file_column_type(name); + return (type > ObExternalFileFormat::INVALID_FORMAT && type < ObExternalFileFormat::MAX_FORMAT); +} + +ObExternalFileFormat::FormatType ObResolverUtils::resolve_external_file_column_type(const ObString &name) +{ + ObExternalFileFormat::FormatType type = ObExternalFileFormat::INVALID_FORMAT; + if (name.prefix_match_ci(N_EXTERNAL_FILE_COLUMN_PREFIX)) { + type = ObExternalFileFormat::CSV_FORMAT; + } else if (0 == name.case_compare(N_EXTERNAL_FILE_ROW)) { + type = ObExternalFileFormat::PARQUET_FORMAT; + } + return type; +} + +int ObResolverUtils::build_file_column_expr_for_parquet( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const ObString &table_name, + const ObString &column_name, + ObRawExpr *get_path_expr, + ObRawExpr *cast_expr, + const ObColumnSchemaV2 *generated_column, + ObRawExpr *&expr) { int ret = OB_SUCCESS; ObPseudoColumnRawExpr *file_column_expr = nullptr; - ObItemType type = T_INVALID; - uint64_t extra = UINT64_MAX; + ObRawExpr *path_expr = nullptr; - if (column_name.case_compare(N_EXTERNAL_FILE_URL) == 0) { - type = T_PSEUDO_EXTERNAL_FILE_URL; - extra = UINT64_MAX; - } else if (column_name.prefix_match_ci(N_PARTITION_LIST_COL)) { - type = T_PSEUDO_PARTITION_LIST_COL; - extra = column_idx; - } else if (column_name.prefix_match_ci(N_EXTERNAL_FILE_COLUMN_PREFIX)) { - type = T_PSEUDO_EXTERNAL_FILE_COL; - extra = column_idx; - } else { + if (OB_FAIL(expr_factory.create_raw_expr(T_PSEUDO_EXTERNAL_FILE_COL, file_column_expr))) { + LOG_WARN("create nextval failed", K(ret)); + } else if (OB_ISNULL(file_column_expr)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("not valid column type", K(column_name), K(ret)); + LOG_WARN("expr is null", K(ret)); + } else { + file_column_expr->set_expr_name(column_name); + file_column_expr->set_table_name(table_name); + file_column_expr->set_table_id(table_id); + file_column_expr->set_explicited_reference(); + + if (OB_ISNULL(get_path_expr) || OB_ISNULL(path_expr = get_path_expr->get_param_expr(1))) { + ret = OB_ERR_UNEXPECTED; + } + if (OB_SUCC(ret)) { + //get type + if (OB_NOT_NULL(cast_expr)) { + bool enable_decimalint = false; + ObExprResType dst_type; + ObConstRawExpr *const_cast_type_expr = static_cast(cast_expr->get_param_expr(1)); + if (!const_cast_type_expr->get_value().is_int()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support non-const expr", K(ret), KPC(const_cast_type_expr)); + } else if (OB_FAIL(const_cast_type_expr->formalize(&session_info))) { + LOG_WARN("fail to formalize expr", K(ret)); + } else if (OB_FAIL(ObSQLUtils::check_enable_decimalint(&session_info, enable_decimalint))) { + LOG_WARN("fail to check_enable_decimalint", K(ret)); + } else if (OB_FAIL(ObExprCast::get_cast_type(enable_decimalint, + const_cast_type_expr->get_result_type(), + cast_expr->get_extra(), dst_type))) { + LOG_WARN("get cast dest type failed", K(ret)); + } else { + if (dst_type.is_string_or_lob_locator_type()) { + // string data stored in parquet file as UTF8 format + dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + } + file_column_expr->set_result_type(dst_type); + } + } else if (OB_NOT_NULL(generated_column)) { + ObColumnRefRawExpr *column_expr = nullptr; + if (OB_FAIL(ObRawExprUtils::build_column_expr(expr_factory, *generated_column, column_expr))) { + LOG_WARN("failed to build column expr", K(ret)); + } else { + file_column_expr->set_accuracy(column_expr->get_accuracy()); + file_column_expr->set_data_type(column_expr->get_data_type()); + file_column_expr->set_collation_type(column_expr->get_collation_type()); + file_column_expr->set_collation_level(column_expr->get_collation_level()); + if (column_expr->get_result_type().is_string_or_lob_locator_type() + && ObCharset::charset_type_by_coll(column_expr->get_collation_type()) != CHARSET_UTF8MB4) { + // string data stored in parquet file as UTF8 format + file_column_expr->set_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected arg", K(ret)); + } + } + + if (OB_SUCC(ret)) { + //get path + if (OB_FAIL(path_expr->formalize(&session_info))) { + LOG_WARN("fail to formalize expr", K(ret)); + } else if (!path_expr->is_static_const_expr()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support non-const expr", K(ret), KPC(path_expr)); + } else { + ObConstRawExpr *const_path_expr = static_cast(path_expr); + if (!const_path_expr->get_value().is_string_type()) { + ret = OB_NOT_SUPPORTED; + } else { + file_column_expr->set_data_access_path(const_path_expr->get_value().get_string()); + } + } + } } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(expr_factory.create_raw_expr(type, file_column_expr))) { + + if (OB_SUCC(ret)) { + if (OB_FAIL(file_column_expr->formalize(&session_info))) { + LOG_WARN("failed to extract info", K(ret)); + } else { + expr = file_column_expr; + } + } + + return ret; +} + +int ObResolverUtils::build_file_row_expr_for_parquet( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const ObString &table_name, + const ObString &column_name, + ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + ObPseudoColumnRawExpr *file_column_expr = nullptr; + + if (OB_FAIL(expr_factory.create_raw_expr(T_PSEUDO_EXTERNAL_FILE_ROW, file_column_expr))) { + LOG_WARN("create nextval failed", K(ret)); + } else if (OB_ISNULL(file_column_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is null", K(ret)); + } else { + file_column_expr->set_expr_name(column_name); + file_column_expr->set_table_name(table_name); + file_column_expr->set_table_id(table_id); + file_column_expr->set_explicited_reference(); + file_column_expr->set_data_type(ObVarcharType); + file_column_expr->set_collation_type(CS_TYPE_BINARY); + } + if (OB_SUCC(ret)) { + if (OB_FAIL(file_column_expr->formalize(&session_info))) { + LOG_WARN("failed to extract info", K(ret)); + } else { + expr = file_column_expr; + } + } + + return ret; +} + +int ObResolverUtils::build_file_column_expr_for_csv(ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const ObString &table_name, + const ObString &column_name, + int64_t column_idx, + ObRawExpr *&expr, + const ObExternalFileFormat &format) +{ + int ret = OB_SUCCESS; + ObPseudoColumnRawExpr *file_column_expr = nullptr; + ObItemType type = T_PSEUDO_EXTERNAL_FILE_COL; + uint64_t extra = column_idx; + + if (OB_FAIL(expr_factory.create_raw_expr(type, file_column_expr))) { LOG_WARN("create nextval failed", K(ret)); } else if (OB_ISNULL(file_column_expr)) { ret = OB_ERR_UNEXPECTED; @@ -4886,36 +5057,106 @@ int ObResolverUtils::build_file_column_expr(ObRawExprFactory &expr_factory, file_column_expr->set_table_id(table_id); file_column_expr->set_explicited_reference(); file_column_expr->set_extra(extra); - if (type == T_PSEUDO_PARTITION_LIST_COL) { - if (OB_ISNULL(generated_column)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("generated column is null", K(ret)); - } else { - const ObAccuracy &accuracy = generated_column->get_accuracy(); - file_column_expr->set_data_type(generated_column->get_data_type()); - file_column_expr->set_result_flag(ObRawExprUtils::calc_column_result_flag(*generated_column)); - file_column_expr->set_accuracy(accuracy); - if (ob_is_string_type(generated_column->get_data_type()) - || ob_is_enumset_tc(generated_column->get_data_type()) - || ob_is_json_tc(generated_column->get_data_type()) - || ob_is_geometry_tc(generated_column->get_data_type())) { - file_column_expr->set_collation_type(generated_column->get_collation_type()); - file_column_expr->set_collation_level(CS_LEVEL_IMPLICIT); - } else { - file_column_expr->set_collation_type(CS_TYPE_BINARY); - file_column_expr->set_collation_level(CS_LEVEL_NUMERIC); - } - } + + file_column_expr->set_data_type(ObVarcharType); + file_column_expr->set_collation_type(ObCharset::get_default_collation(format.csv_format_.cs_type_)); + file_column_expr->set_collation_level(CS_LEVEL_IMPLICIT); + file_column_expr->set_length(OB_MAX_VARCHAR_LENGTH); + if (lib::is_oracle_mode()) { + file_column_expr->set_length_semantics(LS_BYTE); + } + if (OB_FAIL(file_column_expr->formalize(&session_info))) { + LOG_WARN("failed to extract info", K(ret)); } else { - file_column_expr->set_data_type(ObVarcharType); - file_column_expr->set_collation_type(ObCharset::get_default_collation(cs_type)); - file_column_expr->set_length(OB_MAX_VARCHAR_LENGTH); - if (lib::is_oracle_mode()) { - file_column_expr->set_length_semantics(LS_BYTE); + expr = file_column_expr; + } + } + + return ret; +} + +int ObResolverUtils::build_file_column_expr_for_partition_list_col( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const ObString &table_name, + const ObString &column_name, + int64_t column_idx, + ObRawExpr *&expr, + const ObColumnSchemaV2 *generated_column) +{ + int ret = OB_SUCCESS; + ObPseudoColumnRawExpr *file_column_expr = nullptr; + ObColumnRefRawExpr *column_expr = nullptr; + ObItemType type = T_PSEUDO_PARTITION_LIST_COL; + uint64_t extra = column_idx; + + if (OB_FAIL(expr_factory.create_raw_expr(type, file_column_expr))) { + LOG_WARN("create nextval failed", K(ret)); + } else if (OB_ISNULL(file_column_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is null", K(ret)); + } else if (OB_ISNULL(generated_column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("gen column schema is null", K(ret)); + } else { + file_column_expr->set_expr_name(column_name); + file_column_expr->set_table_name(table_name); + file_column_expr->set_table_id(table_id); + file_column_expr->set_explicited_reference(); + file_column_expr->set_extra(extra); + + if (OB_FAIL(ObRawExprUtils::build_column_expr(expr_factory, *generated_column, column_expr))) { + LOG_WARN("failed to build column expr", K(ret)); + } else { + file_column_expr->set_accuracy(column_expr->get_accuracy()); + file_column_expr->set_data_type(column_expr->get_data_type()); + file_column_expr->set_collation_type(column_expr->get_collation_type()); + file_column_expr->set_collation_level(column_expr->get_collation_level()); + if (OB_FAIL(file_column_expr->formalize(&session_info))) { + LOG_WARN("failed to extract info", K(ret)); + } else { + expr = file_column_expr; } } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(file_column_expr->formalize(&session_info))) { + } + + return ret; +} + +int ObResolverUtils::build_file_column_expr_for_file_url( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const ObString &table_name, + const ObString &column_name, + ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + ObPseudoColumnRawExpr *file_column_expr = nullptr; + ObItemType type = T_PSEUDO_EXTERNAL_FILE_URL; + uint64_t extra = UINT64_MAX; + + if (OB_FAIL(expr_factory.create_raw_expr(type, file_column_expr))) { + LOG_WARN("create nextval failed", K(ret)); + } else if (OB_ISNULL(file_column_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is null", K(ret)); + } else { + file_column_expr->set_expr_name(column_name); + file_column_expr->set_table_name(table_name); + file_column_expr->set_table_id(table_id); + file_column_expr->set_explicited_reference(); + file_column_expr->set_extra(extra); + + file_column_expr->set_data_type(ObVarcharType); + file_column_expr->set_collation_type(CS_TYPE_UTF8MB4_BIN); + file_column_expr->set_collation_level(CS_LEVEL_IMPLICIT); + file_column_expr->set_length(OB_MAX_VARCHAR_LENGTH); + if (lib::is_oracle_mode()) { + file_column_expr->set_length_semantics(LS_BYTE); + } + if (OB_FAIL(file_column_expr->formalize(&session_info))) { LOG_WARN("failed to extract info", K(ret)); } else { expr = file_column_expr; @@ -4963,7 +5204,7 @@ int ObResolverUtils::resolve_generated_column_expr(ObResolverParams ¶ms, } else if (lib::is_oracle_mode() && q_name.is_pl_udf()) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "using udf as generated column"); - LOG_WARN("using udf as generated column is not supported", K(ret)); + LOG_WARN("using udf as generated column is not supported", K(ret), K(q_name)); // OZ (ObRawExprUtils::resolve_gen_column_udf_expr(expr, // const_cast(q_name), // *expr_factory, diff --git a/src/sql/resolver/ob_resolver_utils.h b/src/sql/resolver/ob_resolver_utils.h index e1d3e5e67..e8675ae23 100644 --- a/src/sql/resolver/ob_resolver_utils.h +++ b/src/sql/resolver/ob_resolver_utils.h @@ -774,15 +774,50 @@ public: int64_t column_idx, const ObString &expr_name); static int calc_file_column_idx(const ObString &column_name, uint64_t &file_column_idx); - static int build_file_column_expr(ObRawExprFactory &expr_factory, - const ObSQLSessionInfo &session_info, - const uint64_t table_id, - const common::ObString &table_name, - const common::ObString &column_name, - int64_t column_idx, - ObRawExpr *&expr, - ObCharsetType cs_type, - const ObColumnSchemaV2 *generated_column = NULL); + static int build_file_column_expr_for_csv( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const common::ObString &table_name, + const common::ObString &column_name, + int64_t column_idx, + ObRawExpr *&expr, + const ObExternalFileFormat &format); + static int build_file_column_expr_for_partition_list_col( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const common::ObString &table_name, + const common::ObString &column_name, + int64_t column_idx, + ObRawExpr *&expr, + const ObColumnSchemaV2 *generated_column); + static int build_file_column_expr_for_file_url( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const common::ObString &table_name, + const common::ObString &column_name, + ObRawExpr *&expr); + + static int build_file_row_expr_for_parquet( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const common::ObString &table_name, + const common::ObString &column_name, + ObRawExpr *&expr); + static int build_file_column_expr_for_parquet( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const common::ObString &table_name, + const common::ObString &column_name, + ObRawExpr *get_path_expr, + ObRawExpr *cast_expr, + const ObColumnSchemaV2 *generated_column, + ObRawExpr *&expr); + //only used for DDL resolver, resolve a PSEUDO column expr for validation and printer not for execution static int resolve_external_table_column_def(ObRawExprFactory &expr_factory, const ObSQLSessionInfo &session_info, const ObQualifiedName &q_name, @@ -790,6 +825,8 @@ public: ObRawExpr *&expr, const ObColumnSchemaV2 *gen_col_schema = NULL); static bool is_external_file_column_name(const common::ObString &name); + static bool is_external_pseudo_column_name(const common::ObString &name); + static ObExternalFileFormat::FormatType resolve_external_file_column_type(const common::ObString &name); static int resolve_file_format_string_value(const ParseNode *node, const ObCharsetType &format_charset, diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_mysql.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_mysql.result index 60b9c3e8d..28c5df444 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_mysql.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_mysql.result @@ -5315,7 +5315,7 @@ desc oceanbase.DBA_OB_EXTERNAL_TABLE_FILES; Field Type Null Key Default Extra TABLE_NAME varchar(256) NO TABLE_SCHEMA varchar(128) NO -PARTITION_NAME varchar(2) NO +PARTITION_NAME varchar(64) NO FILE_URL varbinary(16384) NO NULL FILE_SIZE bigint(20) NO NULL select /*+QUERY_TIMEOUT(60000000)*/ count(*) as cnt from (select * from oceanbase.DBA_OB_EXTERNAL_TABLE_FILES limit 1); @@ -5325,7 +5325,7 @@ desc oceanbase.ALL_OB_EXTERNAL_TABLE_FILES; Field Type Null Key Default Extra TABLE_NAME varchar(256) NO TABLE_SCHEMA varchar(128) NO -PARTITION_NAME varchar(2) NO +PARTITION_NAME varchar(64) NO FILE_URL varbinary(16384) NO NULL FILE_SIZE bigint(20) NO NULL select /*+QUERY_TIMEOUT(60000000)*/ count(*) as cnt from (select * from oceanbase.ALL_OB_EXTERNAL_TABLE_FILES limit 1); diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result index 703588b22..368d18958 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result @@ -7464,7 +7464,7 @@ desc oceanbase.DBA_OB_EXTERNAL_TABLE_FILES; Field Type Null Key Default Extra TABLE_NAME varchar(256) NO TABLE_SCHEMA varchar(128) NO -PARTITION_NAME varchar(2) NO +PARTITION_NAME varchar(64) NO FILE_URL varbinary(16384) NO NULL FILE_SIZE bigint(20) NO NULL select /*+QUERY_TIMEOUT(60000000)*/ count(*) as cnt from (select * from oceanbase.DBA_OB_EXTERNAL_TABLE_FILES limit 1); @@ -7474,7 +7474,7 @@ desc oceanbase.ALL_OB_EXTERNAL_TABLE_FILES; Field Type Null Key Default Extra TABLE_NAME varchar(256) NO TABLE_SCHEMA varchar(128) NO -PARTITION_NAME varchar(2) NO +PARTITION_NAME varchar(64) NO FILE_URL varbinary(16384) NO NULL FILE_SIZE bigint(20) NO NULL select /*+QUERY_TIMEOUT(60000000)*/ count(*) as cnt from (select * from oceanbase.ALL_OB_EXTERNAL_TABLE_FILES limit 1); @@ -7575,7 +7575,7 @@ Field Type Null Key Default Extra TENANT_ID bigint(20) NO NULL TABLE_NAME varchar(256) NO TABLE_SCHEMA varchar(128) NO -PARTITION_NAME varchar(2) NO +PARTITION_NAME varchar(64) NO FILE_URL varbinary(16384) NO NULL FILE_SIZE bigint(20) NO NULL select /*+QUERY_TIMEOUT(60000000)*/ count(*) as cnt from (select * from oceanbase.CDB_OB_EXTERNAL_TABLE_FILES limit 1);