diff --git a/cmake/Env.cmake b/cmake/Env.cmake index 1fc0c01976..8a8ebc36a2 100644 --- a/cmake/Env.cmake +++ b/cmake/Env.cmake @@ -117,6 +117,8 @@ if(OB_BUILD_CLOSE_MODULES) ob_define(OB_BUILD_ORACLE_PL ON) # dblink ob_define(OB_BUILD_DBLINK ON) + # odps + ob_define(OB_BUILD_CPP_ODPS ON) # 仲裁功能 ob_define(OB_BUILD_ARBITRATION ON) @@ -172,6 +174,10 @@ if(OB_BUILD_DBLINK) add_definitions(-DOB_BUILD_DBLINK) endif() +if(OB_BUILD_CPP_ODPS) + add_definitions(-DOB_BUILD_CPP_ODPS) +endif() + # should not use initial-exec for tls-model if building OBCDC. if(NOT OB_BUILD_CDC) add_definitions(-DENABLE_INITIAL_EXEC_TLS_MODEL) diff --git a/deps/init/oceanbase.el7.aarch64.deps b/deps/init/oceanbase.el7.aarch64.deps index 01601d0654..51f5062ede 100644 --- a/deps/init/oceanbase.el7.aarch64.deps +++ b/deps/init/oceanbase.el7.aarch64.deps @@ -33,7 +33,7 @@ devdeps-s3-cpp-sdk-1.11.156-102023122011.el7.aarch64.rpm devdeps-protobuf-c-1.4.1-100000072023102410.el7.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el7.aarch64.rpm devdeps-apache-arrow-9.0.0-302024052920.el7.aarch64.rpm -devdeps-apache-orc-1.8.3-202024072510.el7.aarch64.rpm +# devdeps-apache-orc-1.8.3-202024072510.el7.aarch64.rpm [tools] obdevtools-binutils-2.30-12022100413.el7.aarch64.rpm diff --git a/deps/init/oceanbase.el7.x86_64.deps b/deps/init/oceanbase.el7.x86_64.deps index 1f3f350de9..7d3496ff8b 100644 --- a/deps/init/oceanbase.el7.x86_64.deps +++ b/deps/init/oceanbase.el7.x86_64.deps @@ -36,7 +36,7 @@ devdeps-s3-cpp-sdk-1.11.156-102023122011.el7.x86_64.rpm devdeps-protobuf-c-1.4.1-100000062023102016.el7.x86_64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el7.x86_64.rpm devdeps-apache-arrow-9.0.0-222024052223.el7.x86_64.rpm -devdeps-apache-orc-1.8.3-202024072510.el7.x86_64.rpm +# devdeps-apache-orc-1.8.3-202024072510.el7.x86_64.rpm [tools] obdevtools-binutils-2.30-12022100413.el7.x86_64.rpm diff --git a/deps/init/oceanbase.el8.aarch64.deps b/deps/init/oceanbase.el8.aarch64.deps index 7f32f9b7f6..5943a48eb1 100644 --- a/deps/init/oceanbase.el8.aarch64.deps +++ b/deps/init/oceanbase.el8.aarch64.deps @@ -33,7 +33,7 @@ devdeps-s3-cpp-sdk-1.11.156-102023122011.el8.aarch64.rpm devdeps-protobuf-c-1.4.1-100000072023102410.el8.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.aarch64.rpm devdeps-apache-arrow-9.0.0-322024052923.el8.aarch64.rpm -devdeps-apache-orc-1.8.3-202024072510.el8.aarch64.rpm +# devdeps-apache-orc-1.8.3-202024072510.el8.aarch64.rpm [tools] obdevtools-binutils-2.30-12022100413.el8.aarch64.rpm diff --git a/deps/init/oceanbase.el8.x86_64.deps b/deps/init/oceanbase.el8.x86_64.deps index 0373426cdd..54692f8961 100644 --- a/deps/init/oceanbase.el8.x86_64.deps +++ b/deps/init/oceanbase.el8.x86_64.deps @@ -35,7 +35,8 @@ devdeps-s3-cpp-sdk-1.11.156-102023122011.el8.x86_64.rpm devdeps-protobuf-c-1.4.1-100000062023102016.el8.x86_64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.x86_64.rpm devdeps-apache-arrow-9.0.0-172024052218.el8.x86_64.rpm -devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm +devdeps-odps-cpp-sdk-1.0.0-482024080517.el8.x86_64.rpm +# devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm [tools] obdevtools-binutils-2.30-12022100413.el8.x86_64.rpm diff --git a/deps/init/oceanbase.el9.aarch64.deps b/deps/init/oceanbase.el9.aarch64.deps index dc3f98ec10..7ab33f270a 100644 --- a/deps/init/oceanbase.el9.aarch64.deps +++ b/deps/init/oceanbase.el9.aarch64.deps @@ -37,7 +37,7 @@ devdeps-s3-cpp-sdk-1.11.156-102023122011.el8.aarch64.rpm devdeps-protobuf-c-1.4.1-100000072023102410.el8.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.aarch64.rpm devdeps-apache-arrow-9.0.0-322024052923.el8.aarch64.rpm -devdeps-apache-orc-1.8.3-202024072510.el8.aarch64.rpm +# devdeps-apache-orc-1.8.3-202024072510.el8.aarch64.rpm [deps-el9] devdeps-apr-1.6.5-232023090616.el9.aarch64.rpm target=el9 diff --git a/deps/init/oceanbase.el9.x86_64.deps b/deps/init/oceanbase.el9.x86_64.deps index e049cb0558..b3a986afd2 100644 --- a/deps/init/oceanbase.el9.x86_64.deps +++ b/deps/init/oceanbase.el9.x86_64.deps @@ -39,7 +39,7 @@ devdeps-s3-cpp-sdk-1.11.156-102023122011.el8.x86_64.rpm devdeps-protobuf-c-1.4.1-100000062023102016.el8.x86_64.rpm devdeps-apache-arrow-9.0.0-172024052218.el8.x86_64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.x86_64.rpm -devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm +# devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm [deps-el9] devdeps-apr-1.6.5-232023090616.el9.x86_64.rpm target=el9 diff --git a/deps/oblib/src/CMakeLists.txt b/deps/oblib/src/CMakeLists.txt index 93e6171aad..fc85c72008 100644 --- a/deps/oblib/src/CMakeLists.txt +++ b/deps/oblib/src/CMakeLists.txt @@ -21,7 +21,7 @@ target_include_directories( ${DEP_DIR}/include/apr-1/ ${DEP_DIR}/include/icu/common ${DEP_DIR}/include/apache-arrow - ${DEP_DIR}/include/apache-orc + # ${DEP_DIR}/include/apache-orc ${USSL_INCLUDE_DIRS} ) @@ -209,12 +209,12 @@ target_link_libraries(oblib_base_base_base ${DEP_DIR}/lib64/libarrow.a ${DEP_DIR}/lib64/libparquet.a ${DEP_DIR}/lib64/libarrow_bundled_dependencies.a - ${DEP_DIR}/lib64/liborc.a - ${DEP_DIR}/lib64/libsnappy.a - ${DEP_DIR}/lib64/libprotoc.a - ${DEP_DIR}/lib64/libprotobuf.a - ${DEP_DIR}/lib64/liblz4.a - ${DEP_DIR}/lib64/libzstd.a + # ${DEP_DIR}/lib64/liborc.a + # ${DEP_DIR}/lib64/libsnappy.a + # ${DEP_DIR}/lib64/libprotoc.a + # ${DEP_DIR}/lib64/libprotobuf.a + # ${DEP_DIR}/lib64/liblz4.a + # ${DEP_DIR}/lib64/libzstd.a -L${DEP_DIR}/var/usr/lib64 -L${DEP_DIR}/var/usr/lib -L${DEP_3RD_DIR}/usr/lib @@ -244,12 +244,12 @@ target_link_libraries(oblib_base_base_base ${DEP_DIR}/lib64/libarrow.a ${DEP_DIR}/lib64/libparquet.a ${DEP_DIR}/lib64/libarrow_bundled_dependencies.a - ${DEP_DIR}/lib64/liborc.a - ${DEP_DIR}/lib64/libsnappy.a - ${DEP_DIR}/lib64/libprotoc.a - ${DEP_DIR}/lib64/libprotobuf.a - ${DEP_DIR}/lib64/liblz4.a - ${DEP_DIR}/lib64/libzstd.a + # ${DEP_DIR}/lib64/liborc.a + # ${DEP_DIR}/lib64/libsnappy.a + # ${DEP_DIR}/lib64/libprotoc.a + # ${DEP_DIR}/lib64/libprotobuf.a + # ${DEP_DIR}/lib64/liblz4.a + # ${DEP_DIR}/lib64/libzstd.a -L${DEP_DIR}/var/usr/lib64 -L${DEP_DIR}/var/usr/lib -L${DEP_3RD_DIR}/usr/lib diff --git a/deps/oblib/src/lib/charset/ob_charset.cpp b/deps/oblib/src/lib/charset/ob_charset.cpp index fbe4b2cf85..261bb0d7e5 100644 --- a/deps/oblib/src/lib/charset/ob_charset.cpp +++ b/deps/oblib/src/lib/charset/ob_charset.cpp @@ -3337,7 +3337,7 @@ int ObCharset::charset_convert(ObIAllocator &alloc, char *res_buf = static_cast(alloc.alloc(res_buf_len)); if (OB_ISNULL(res_buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory failed", K(ret)); + LOG_WARN("alloc memory failed", K(ret), K(lbt())); } else { if (OB_SUCC(charset_convert(src_cs_type, in.ptr(), in.length(), dst_cs_type, res_buf, res_buf_len, res_len))) { diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index cb5388f0b2..6e16d74b70 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -372,6 +372,8 @@ const int64_t OB_MAX_PASSWORD_LENGTH = 128; const int64_t OB_MAX_PASSWORD_BUF_LENGTH = OB_MAX_PASSWORD_LENGTH + 1; // After each sha1 is 41 characters, the incremental backup is up to 64 times, and the maximum password required for recovery is 64*(41+1)=2,688 const int64_t OB_MAX_ENCRYPTED_PASSWORD_LENGTH = OB_MAX_PASSWORD_LENGTH * 4; +const int64_t OB_MAX_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH = 128; +const int64_t OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH = OB_MAX_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH * 4; const int64_t OB_MAX_PASSWORD_ARRAY_LENGTH = 4096; const int64_t OB_MAX_ERROR_MSG_LEN = 512; const int64_t OB_MAX_RESULT_MESSAGE_LENGTH = 1024; diff --git a/deps/oblib/src/lib/ob_name_def.h b/deps/oblib/src/lib/ob_name_def.h index 1ca13f8e7c..1b07db2a90 100644 --- a/deps/oblib/src/lib/ob_name_def.h +++ b/deps/oblib/src/lib/ob_name_def.h @@ -1067,6 +1067,7 @@ #define N_ST_WITHIN "st_within" #define N_SQL_MODE_CONVERT "sql_mode_convert" #define N_EXTERNAL_FILE_COLUMN_PREFIX "metadata$filecol" +#define N_EXTERNAL_TABLE_COLUMN_PREFIX "external$tablecol" #define N_PARTITION_LIST_COL "metadata$partition_list_col" #define N_EXTERNAL_FILE_URL "metadata$fileurl" #define N_EXTERNAL_FILE_ROW "external$filerow" diff --git a/deps/oblib/unittest/lib/CMakeLists.txt b/deps/oblib/unittest/lib/CMakeLists.txt index a7bb13cba2..2547f00e92 100644 --- a/deps/oblib/unittest/lib/CMakeLists.txt +++ b/deps/oblib/unittest/lib/CMakeLists.txt @@ -6,7 +6,7 @@ # oblib_addtest(time/test_ob_time_utility.cpp) # oblib_addtest(timezone/test_ob_timezone_utils.cpp) oblib_addtest(parquet/test_parquet.cpp) -oblib_addtest(orc/test_orc.cpp) +# oblib_addtest(orc/test_orc.cpp) oblib_addtest(alloc/test_alloc_struct.cpp) oblib_addtest(alloc/test_block_set.cpp) oblib_addtest(alloc/test_chunk_mgr.cpp) diff --git a/src/share/external_table/ob_external_table_file_mgr.cpp b/src/share/external_table/ob_external_table_file_mgr.cpp index be1f59f51f..b52daaedc2 100644 --- a/src/share/external_table/ob_external_table_file_mgr.cpp +++ b/src/share/external_table/ob_external_table_file_mgr.cpp @@ -246,7 +246,6 @@ int ObExternalTableFileManager::get_external_files_by_part_id( LOG_WARN("fail to fill cache from inner table", K(ret)); } } - for (int i = 0; OB_SUCC(ret) && i < ext_files->file_urls_.count(); ++i) { bool in_ranges = false; if (range_filter != NULL && OB_FAIL(ObExternalTableUtils::is_file_id_in_ranges(*range_filter, @@ -584,6 +583,83 @@ int ObExternalTableFileManager::get_all_partition_list_val(const ObTableSchema * return ret; } +int ObExternalTableFileManager::calculate_odps_part_val_by_part_spec(const ObTableSchema *table_schema, + const ObIArray &file_infos, + ObIArray &part_vals, + share::schema::ObSchemaGetterGuard &schema_guard, + ObExecContext &exec_ctx) +{ + int ret = OB_SUCCESS; + ObIAllocator &allocator = exec_ctx.get_allocator(); + CK (OB_NOT_NULL(table_schema) && OB_LIKELY(table_schema->is_odps_external_table())); + if (OB_SUCC(ret)) { + const common::ObPartitionKeyInfo &part_key_info = table_schema->get_partition_key_info(); + const int part_key_size = part_key_info.get_size(); + for (int64_t i = 0; OB_SUCC(ret) && i < file_infos.count(); i++) { + const ObString &all_part_spec = file_infos.at(i).file_url_; + ObSEArray part_spec_list; + if (OB_FAIL(ObSQLUtils::extract_odps_part_spec(all_part_spec, part_spec_list))) { + LOG_WARN("failed to extract odps part spec", K(ret), K(all_part_spec)); + } else if (part_spec_list.count() != part_key_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected count find part spec of odps", K(ret), K(file_infos), K(file_infos.count()), K(i), K(all_part_spec), K(part_spec_list.count()), K(part_key_size)); + } else { + ObNewRow odps_part_row; + ObObj *obj_array = nullptr; + if (OB_ISNULL(obj_array = static_cast(allocator.alloc(sizeof(ObObj) * part_key_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret)); + } else { + for (ObObj *ptr = obj_array; OB_SUCC(ret) && ptr < obj_array + part_key_size; ++ptr) { + new(ptr)ObObj(); + } + odps_part_row.assign(obj_array, part_key_size); + } + for (int64_t j = 0; OB_SUCC(ret) && j < part_spec_list.count(); ++j) { + const ObRowkeyColumn *part_col = part_key_info.get_column(j); + ObObjType part_key_type = ObUnknownType; + ObString& part_spec = part_spec_list.at(j); + if (OB_ISNULL(part_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret), K(j)); + } else if (FALSE_IT(part_key_type = part_col->get_meta_type().get_type())) { + } else if (part_key_type == ObVarcharType || + part_key_type == ObCharType) { + odps_part_row.get_cell(j).set_meta_type(part_col->get_meta_type()); + odps_part_row.get_cell(j).set_collation_type(ObCharset::get_system_collation()); + odps_part_row.get_cell(j).set_varchar_value(part_spec.ptr(), part_spec.length()); + } else if (part_key_type == ObTinyIntType || + part_key_type == ObSmallIntType || + part_key_type == ObMediumIntType || + part_key_type == ObInt32Type || + part_key_type == ObIntType) { + int64_t val = 0; + for (int64_t k = 0; OB_SUCC(ret) && k < part_spec.length(); ++k) { + if (part_spec.ptr()[k] >= '0' && part_spec.ptr()[k] <= '9') { + val = val * 10 + part_spec.ptr()[k] - '0'; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected part_spec", K(ret), K(part_spec)); + } + } + if (OB_SUCC(ret)) { + odps_part_row.get_cell(j).set_meta_type(part_col->get_meta_type()); + odps_part_row.get_cell(j).set_int(val); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected part_key_type", K(part_key_type), K(j), K(ret)); + } + } + if (OB_SUCC(ret) && OB_FAIL(part_vals.push_back(odps_part_row))) { + LOG_WARN("failed push back odps_part_row", K(ret)); + } + } + } + } + return ret; +} + int ObExternalTableFileManager::calculate_file_part_val_by_file_name(const ObTableSchema *table_schema, const ObIArray &file_infos, ObIArray &part_vals, @@ -603,8 +679,11 @@ int ObExternalTableFileManager::calculate_file_part_val_by_file_name(const ObTab ObNewRow list_val; ObObj *obj_array = nullptr; if (file_name_row.get_count() > 0) { - file_name_row.get_cell(0).set_string(ObVarcharType, is_local_storage ? - file_infos.at(i).file_url_.after(ip_delimiter) : file_infos.at(i).file_url_); + file_name_row.get_cell(0).set_string(ObVarcharType, table_schema->is_odps_external_table() ? + file_infos.at(i).file_url_.after(equals_delimiter) : + (is_local_storage ? + file_infos.at(i).file_url_.after(ip_delimiter) : + file_infos.at(i).file_url_)); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("row cell count not expected", K(ret)); @@ -675,9 +754,16 @@ int ObExternalTableFileManager::calculate_all_files_partitions(share::schema::Ob ObArray existed_part_vals; ObArray existed_part_ids; ObArray file_part_vals; + bool is_odps_external_table = false; CK (OB_NOT_NULL(table_schema) && OB_LIKELY(table_schema->is_external_table())); OZ (get_all_partition_list_val(table_schema, existed_part_vals, existed_part_ids)); - OZ (calculate_file_part_val_by_file_name(table_schema, file_infos, file_part_vals, schema_guard, exec_ctx)); + OX(is_odps_external_table = table_schema->is_odps_external_table()); + + if (is_odps_external_table) { + OZ(calculate_odps_part_val_by_part_spec(table_schema, file_infos, file_part_vals, schema_guard, exec_ctx)); + } else { + OZ (calculate_file_part_val_by_file_name(table_schema, file_infos, file_part_vals, schema_guard, exec_ctx)); + } for (int64_t i = 0; OB_SUCC(ret) && i < file_part_vals.count(); i++) { int64_t idx = -1; OZ (find_partition_existed(existed_part_vals, file_part_vals.at(i), idx)); @@ -805,7 +891,7 @@ int ObExternalTableFileManager::update_inner_table_files_list_by_table( if (OB_SUCC(ret) && partitions_to_del.count() > 0) { ObAlterTableStmt *alter_table_stmt = NULL; - OZ (create_alter_table_stmt(exec_ctx, table_schema, database_schema, partitions_to_add.count(), ObAlterTableArg::DROP_PARTITION, alter_table_stmt)); + OZ (create_alter_table_stmt(exec_ctx, table_schema, database_schema, partitions_to_del.count(), ObAlterTableArg::DROP_PARTITION, alter_table_stmt)); if (OB_SUCC(ret)) { for (int64_t i = 0; OB_SUCC(ret) && i < partitions_to_del.count(); i++) { if (OB_ISNULL(partitions_to_del.at(i))) { @@ -858,11 +944,7 @@ int ObExternalTableFileManager::update_inner_table_files_list_by_table( int64_t mock_part_id = GET_EXT_MOCK_PART_ID(part_ids.at(i)); CHECK_EXT_MOCK_PART_ID_VALID(file_part_ids_added, mock_part_id); part_id = MAP_EXT_MOCK_PART_ID_TO_REAL_PART_ID(file_part_ids_added, mock_part_id); - } else { - part_id = part_ids.at(i); - } - if (OB_SUCC(ret)) { - OZ (add_item_to_map(exec_ctx.get_allocator(), part_id_to_file_urls, part_id, file_infos.at(i))); + part_ids.at(i) = part_id; } } #undef GET_EXT_MOCK_PART_ID @@ -870,6 +952,9 @@ int ObExternalTableFileManager::update_inner_table_files_list_by_table( } } + for (int64_t i = 0; OB_SUCC(ret) && i < part_ids.count(); i++) { + OZ (add_item_to_map(exec_ctx.get_allocator(), part_id_to_file_urls, part_ids.at(i), file_infos.at(i))); + } //OZ (get_part_id_to_file_urls_map(table_schema, database_schema, is_local_storage, file_urls, file_sizes, schema_guard, exec_ctx, trans, part_id_to_file_urls)); for (common::hash::ObHashMap *>::iterator it = part_id_to_file_urls.begin(); OB_SUCC(ret) && it != part_id_to_file_urls.end(); it++) { @@ -939,6 +1024,16 @@ int ObExternalTableFileManager::update_inner_table_files_list_by_part( int64_t insert_rows = 0; int64_t max_file_id = 0;// ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_FILE_ID - 1 common::hash::ObHashMap hash_map; + share::schema::ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = NULL; + bool is_odps_external_table = false; + char file_url_buf[256] = { 0 }; + OZ (GCTX.schema_service_->get_tenant_schema_guard(tenant_id, schema_guard)); + OZ (schema_guard.get_table_schema(tenant_id, table_id, table_schema)); + CK (OB_NOT_NULL(table_schema)); + if (OB_SUCC(ret)) { + is_odps_external_table = table_schema->is_odps_external_table(); + } OZ(get_all_records_from_inner_table(allocator, tenant_id, table_id, partition_id, old_file_infos, old_file_ids)); OZ(hash_map.create(std::max(file_infos.count(), old_file_infos.count()) + 1, "ExternalFile")); for (int64_t i = 0; OB_SUCC(ret) && i < old_file_infos.count(); i++) { @@ -952,7 +1047,7 @@ int ObExternalTableFileManager::update_inner_table_files_list_by_part( if (ret == OB_HASH_NOT_EXIST) { ret = OB_SUCCESS; OZ(insert_file_infos.push_back(file_infos.at(i))); - OZ(insert_file_ids.push_back(++max_file_id)); + OZ(insert_file_ids.push_back(is_odps_external_table ? 0 : ++max_file_id)); // odps table's file_id is 0 } else if (ret == OB_SUCCESS) { OZ(update_file_infos.push_back(file_infos.at(i))); OZ(update_file_ids.push_back(file_id)); @@ -960,7 +1055,7 @@ int ObExternalTableFileManager::update_inner_table_files_list_by_part( } OZ(hash_map.reuse()); for (int64_t i = 0; OB_SUCC(ret) && i < file_infos.count(); i++) { - OZ(hash_map.set_refactored(file_infos.at(i).file_url_, 1)); + OZ(hash_map.set_refactored(file_infos.at(i).file_url_, is_odps_external_table ? 0 : 1)); // odps table's file_id is 0 } for (int64_t i = 0; OB_SUCC(ret) && i < old_file_infos.count(); i++) { int64_t existed = 0; @@ -997,10 +1092,13 @@ int ObExternalTableFileManager::update_inner_table_files_list_by_part( OZ(insert_sql.assign_fmt("INSERT INTO %s(TABLE_ID,PART_ID,FILE_ID,FILE_URL,CREATE_VERSION,DELETE_VERSION,FILE_SIZE) VALUES", OB_ALL_EXTERNAL_TABLE_FILE_TNAME)); for (int64_t i = 0; OB_SUCC(ret) && i < insert_file_infos.count(); i++) { + ObString file_url = insert_file_infos.at(i).file_url_; + int new_url_len = ObHexEscapeSqlStr(insert_file_infos.at(i).file_url_).to_string(file_url_buf, 256); + file_url.assign(file_url_buf, new_url_len); OZ(insert_sql.append_fmt("%c(%lu,%lu,%ld,'%.*s',%ld,%ld,%ld)", (0 == i) ? ' ' : ',', table_id, partition_id, insert_file_ids.at(i), - insert_file_infos.at(i).file_url_.length(), insert_file_infos.at(i).file_url_.ptr(), + file_url.length(), file_url.ptr(), cur_time, MAX_VERSION, insert_file_infos.at(i).file_size_)); } OZ(trans.write(tenant_id, insert_sql.ptr(), insert_rows)); @@ -1020,7 +1118,7 @@ int ObExternalTableFileManager::get_all_records_from_inner_table(ObIAllocator &a SMART_VAR(ObMySQLProxy::MySQLResult, res) { sqlclient::ObMySQLResult *result = NULL; ObSqlString sql; - OZ (sql.append_fmt("SELECT file_url, file_id FROM %s" + OZ (sql.append_fmt("SELECT file_url, file_id, file_size FROM %s" " WHERE table_id = %lu AND part_id = %lu", OB_ALL_EXTERNAL_TABLE_FILE_TNAME, table_id, partition_id)); OZ (GCTX.sql_proxy_->read(res, tenant_id, sql.ptr())); @@ -1031,13 +1129,17 @@ int ObExternalTableFileManager::get_all_records_from_inner_table(ObIAllocator &a } else { while (OB_SUCC(result->next())) { ObString file_url; - int64_t file_id; + int64_t file_id = 0; + int64_t file_size = 0; EXTRACT_VARCHAR_FIELD_MYSQL(*result, "file_url", file_url); EXTRACT_INT_FIELD_MYSQL(*result, "file_id", file_id, int64_t); + EXTRACT_INT_FIELD_MYSQL(*result, "file_size", file_size, int64_t); ObString tmp_url; OZ (ob_write_string(allocator, file_url, tmp_url)); ObExternalFileInfoTmp file_info; + file_info.part_id_ = partition_id; file_info.file_url_ = tmp_url; + file_info.file_size_ = file_size; OZ (file_urls.push_back(file_info)); OZ (file_ids.push_back(file_id)); } @@ -1272,7 +1374,10 @@ int ObExternalTableFileManager::refresh_external_table(const uint64_t tenant_id, table_schema->get_table_id(), table_schema->get_external_file_location(), table_schema->get_external_file_location_access_info(), - table_schema->get_external_file_pattern(), regexp_vars, exec_ctx.get_allocator(), + table_schema->get_external_file_pattern(), + table_schema->get_external_properties(), + table_schema->is_partitioned_table(), + regexp_vars, exec_ctx.get_allocator(), full_path, file_urls, file_sizes)); //TODO [External Table] opt performance diff --git a/src/share/external_table/ob_external_table_file_mgr.h b/src/share/external_table/ob_external_table_file_mgr.h index 12b4923785..9c0e23f226 100644 --- a/src/share/external_table/ob_external_table_file_mgr.h +++ b/src/share/external_table/ob_external_table_file_mgr.h @@ -21,6 +21,7 @@ #include "sql/resolver/ob_resolver_utils.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "src/sql/resolver/ob_stmt_resolver.h" +#include "sql/engine/table/ob_odps_table_row_iter.h" namespace oceanbase { namespace sql { @@ -104,6 +105,7 @@ public: const char* auto_refresh_job_name = "auto_refresh_external_table_job"; const char ip_delimiter = '%'; + const char equals_delimiter = '='; ObExternalTableFileManager() {} @@ -200,7 +202,7 @@ private: const uint64_t partition_id, const ObIArray &file_infos); - int update_inner_table_files_list_by_table( + int update_inner_table_files_list_by_table( sql::ObExecContext &exec_ctx, ObMySQLTransaction &trans, const uint64_t tenant_id, @@ -255,6 +257,11 @@ private: ObIArray &part_vals, share::schema::ObSchemaGetterGuard &schema_guard, ObExecContext &exec_ctx); + int calculate_odps_part_val_by_part_spec(const ObTableSchema *table_schema, + const ObIArray &file_infos, + ObIArray &part_vals, + share::schema::ObSchemaGetterGuard &schema_guard, + ObExecContext &exec_ctx); int find_partition_existed(ObIArray &existed_part, ObNewRow &file_part_val, int64_t &found); diff --git a/src/share/external_table/ob_external_table_utils.cpp b/src/share/external_table/ob_external_table_utils.cpp index 611cb71e72..f5a10a18e7 100644 --- a/src/share/external_table/ob_external_table_utils.cpp +++ b/src/share/external_table/ob_external_table_utils.cpp @@ -74,6 +74,9 @@ int ObExternalTableUtils::is_file_id_in_ranges(const ObIArray &ran in_ranges = true; } } + if (0 == file_id) { + in_ranges = true; + } return ret; } @@ -100,7 +103,8 @@ int ObExternalTableUtils::resolve_file_id_range(const ObNewRange &range, start_file = ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_FILE_ID; end_file = INT64_MAX; if (column_idx >= range.get_start_key().get_obj_cnt() || - column_idx >= range.get_end_key().get_obj_cnt() ) { + column_idx >= range.get_end_key().get_obj_cnt() || + column_idx < 0) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed. input column idx invalid", K(ret), K(range), K(column_idx)); } else { @@ -139,6 +143,25 @@ int ObExternalTableUtils::resolve_line_number_range(const ObNewRange &range, return ret; } +int ObExternalTableUtils::resolve_odps_start_step(const ObNewRange &range, + const int64_t &column_idx, + int64_t &start, + int64_t &step) +{ + int ret = OB_SUCCESS; + if (column_idx >= range.get_start_key().get_obj_cnt() || + column_idx >= range.get_end_key().get_obj_cnt() ) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed. input column idx invalid", K(ret), K(range), K(column_idx)); + } else { + const ObObj &start_obj = range.get_start_key().get_obj_ptr()[column_idx]; + const ObObj &end_obj = range.get_end_key().get_obj_ptr()[column_idx]; + start = start_obj.get_int(); + step = end_obj.get_int(); + } + return ret; +} + int ObExternalTableUtils::convert_external_table_new_range(const ObString &file_url, const int64_t file_id, const uint64_t part_id, @@ -237,7 +260,7 @@ int ObExternalTableUtils::make_external_table_scan_range(const common::ObString int ret = OB_SUCCESS; ObObj *obj_start = NULL; ObObj *obj_end = NULL; - if (OB_UNLIKELY(first_lineno > last_lineno)) { + if (OB_UNLIKELY(first_lineno > last_lineno && file_id != 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed. get invalid params", K(ret), K(first_lineno), K(last_lineno)); } else if (OB_ISNULL(obj_start = static_cast(allocator.alloc(sizeof(ObObj) * @@ -307,29 +330,54 @@ int ObExternalTableUtils::prepare_single_scan_range(const uint64_t tenant_id, } else { new_range.reset(); } - for (int64_t i = 0; OB_SUCC(ret) && i < tmp_ranges.count(); ++i) { - if (OB_ISNULL(tmp_ranges.at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected NULL ptr", K(ret)); - } else { - for (int64_t j = 0; OB_SUCC(ret) && j < file_urls.count(); ++j) { - ObNewRange *range = NULL; - bool is_valid = false; - if (OB_ISNULL(range = OB_NEWx(ObNewRange, (&range_allocator)))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to new a ptr", K(ret)); - } else if (OB_FAIL(ObExternalTableUtils::convert_external_table_new_range( - file_urls.at(j).file_url_, - file_urls.at(j).file_id_, - file_urls.at(j).part_id_, - *tmp_ranges.at(i), - range_allocator, - *range, - is_valid))) { - LOG_WARN("failed to convert external table new range", K(ret), K(file_urls.at(j)), - K(ranges.at(i))); - } else if (is_valid) { - OZ (new_range.push_back(range)); + if (!file_urls.empty() && file_urls.at(0).file_id_ == 0) {// if file_id_ == 0 means, it's odps table + for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); ++i) { + const ObExternalFileInfo& external_info = file_urls.at(i); + ObNewRange *range = NULL; + if (0 != external_info.file_id_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected file id", K(ret), K(i), K(external_info.file_id_)); + } else if (OB_ISNULL(range = OB_NEWx(ObNewRange, (&range_allocator)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to new a ptr", K(ret)); + } else if (OB_FAIL(ObExternalTableUtils::make_external_table_scan_range(external_info.file_url_, + external_info.file_id_, + external_info.part_id_, + 0, + INT64_MAX, + range_allocator, + *range))) { + LOG_WARN("failed to make external table scan range", K(ret)); + } else { + OZ (new_range.push_back(range)); + } + } + + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_ranges.count(); ++i) { + if (OB_ISNULL(tmp_ranges.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected NULL ptr", K(ret)); + } else { + for (int64_t j = 0; OB_SUCC(ret) && j < file_urls.count(); ++j) { + ObNewRange *range = NULL; + bool is_valid = false; + if (OB_ISNULL(range = OB_NEWx(ObNewRange, (&range_allocator)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to new a ptr", K(ret)); + } else if (OB_FAIL(ObExternalTableUtils::convert_external_table_new_range( + file_urls.at(j).file_url_, + file_urls.at(j).file_id_, + file_urls.at(j).part_id_, + *tmp_ranges.at(i), + range_allocator, + *range, + is_valid))) { + LOG_WARN("failed to convert external table new range", K(ret), K(file_urls.at(j)), + K(ranges.at(i))); + } else if (is_valid) { + OZ (new_range.push_back(range)); + } } } } @@ -634,6 +682,8 @@ int ObExternalTableUtils::collect_external_file_list( const ObString &location, const ObString &access_info, const ObString &pattern, + const ObString &properties, + const bool &is_partitioned_table, const sql::ObExprRegexpSessionVariables ®exp_vars, ObIAllocator &allocator, ObSqlString &full_path, @@ -642,32 +692,102 @@ int ObExternalTableUtils::collect_external_file_list( { int ret = OB_SUCCESS; - ObSEArray all_servers; - share::schema::ObSchemaGetterGuard schema_guard; - OZ (GCTX.location_service_->external_table_get(tenant_id, table_id, all_servers)); - const bool is_local_storage = ObSQLUtils::is_external_files_on_local_disk(location); - if (OB_SUCC(ret) && full_path.length() > 0 - && *(full_path.ptr() + full_path.length() - 1) != '/' ) { - OZ (full_path.append("/")); - } - if (OB_FAIL(ret)) { - } else if (is_local_storage) { - OZ (collect_local_files_on_servers(tenant_id, location, pattern, regexp_vars, all_servers, file_urls, file_sizes, full_path, allocator)); - } else { - OZ (ObExternalTableFileManager::get_instance().get_external_file_list_on_device( - location, pattern, regexp_vars, file_urls, file_sizes, access_info, allocator)); - for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); i++) { - ObSqlString tmp_file_url; - ObString &file_url = file_urls.at(i); - OZ (tmp_file_url.append(full_path.string())); - OZ (tmp_file_url.append(file_urls.at(i))); - OZ (ob_write_string(allocator, tmp_file_url.string(), file_url)); + if (!properties.empty()) { +#ifdef OB_BUILD_CPP_ODPS + // Since each partition information of an ODPS table obtained by the ODPS driver is a string, + // OceanBase treat partition string as an external table filename, one file corresponds to one odps partition, + // the number of files corresponds to the number of partitions. + sql::ObODPSTableRowIterator odps_driver; + sql::ObExternalFileFormat ex_format; + ex_format.format_type_ = sql::ObExternalFileFormat::ODPS_FORMAT; + if (OB_FAIL(ex_format.load_from_string(properties, allocator))) { + LOG_WARN("failed to load from string", K(ret)); + } else if (OB_FAIL(odps_driver.init_tunnel(ex_format.odps_format_))) { + LOG_WARN("failed to init tunnel", K(ret)); + } else if (OB_FAIL(odps_driver.pull_partition_info())) { + LOG_WARN("failed to pull partition info", K(ret)); + } else if (odps_driver.is_part_table()) { + if (!is_partitioned_table) { + ret = OB_EXTERNAL_ODPS_UNEXPECTED_ERROR; + LOG_WARN("remote odps table is partitioned table, but local odps external table is not partitioned table", K(ret)); + LOG_USER_ERROR(OB_EXTERNAL_ODPS_UNEXPECTED_ERROR, "remote odps table is partitioned table, but local odps external table is not partitioned table"); + } + ObIArray& part_list_info = odps_driver.get_partition_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < part_list_info.count(); ++i) { + const char *part_spec_src = part_list_info.at(i).name_.c_str(); + int64_t part_spec_src_len = STRLEN(part_spec_src); + char *part_spec = NULL; + if (OB_ISNULL(part_spec = reinterpret_cast(allocator.alloc(part_spec_src_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc mem", K(part_spec_src_len), K(ret)); + } else { + MEMCPY(part_spec, part_spec_src, part_spec_src_len); + OZ(file_sizes.push_back(part_list_info.at(i).record_count_)); + OZ (file_urls.push_back(ObString(part_spec_src_len, part_spec))); + } + } + } else { + ObIArray& part_list_info = odps_driver.get_partition_info(); + if (is_partitioned_table) { + ret = OB_EXTERNAL_ODPS_UNEXPECTED_ERROR; + LOG_WARN("remote odps table is not partitioned table, but local odps external table is partitioned table", K(ret)); + LOG_USER_ERROR(OB_EXTERNAL_ODPS_UNEXPECTED_ERROR, "remote odps table is not partitioned table, but local odps external table is partitioned table"); + } else if (1 != part_list_info.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected count of partition info", K(ret), K(part_list_info.count())); + } + OZ(file_sizes.push_back(part_list_info.at(0).record_count_)); + OZ (file_urls.push_back(ObString(""))); + } +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support odps table in opensource", K(ret)); +#endif + } else if (location.empty()) { + //do nothing + } else { + ObSEArray all_servers; + share::schema::ObSchemaGetterGuard schema_guard; + OZ (GCTX.location_service_->external_table_get(tenant_id, table_id, all_servers)); + const bool is_local_storage = ObSQLUtils::is_external_files_on_local_disk(location); + if (OB_SUCC(ret) && full_path.length() > 0 + && *(full_path.ptr() + full_path.length() - 1) != '/' ) { + OZ (full_path.append("/")); + } + if (OB_FAIL(ret)) { + } else if (is_local_storage) { + OZ (collect_local_files_on_servers(tenant_id, location, pattern, regexp_vars, all_servers, file_urls, file_sizes, full_path, allocator)); + } else { + OZ (ObExternalTableFileManager::get_instance().get_external_file_list_on_device( + location, pattern, regexp_vars, file_urls, file_sizes, access_info, allocator)); + for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); i++) { + ObSqlString tmp_file_url; + ObString &file_url = file_urls.at(i); + OZ (tmp_file_url.append(full_path.string())); + OZ (tmp_file_url.append(file_urls.at(i))); + OZ (ob_write_string(allocator, tmp_file_url.string(), file_url)); + } } - } - OZ (ObExternalTableUtils::sort_external_files(file_urls, file_sizes)); + OZ (ObExternalTableUtils::sort_external_files(file_urls, file_sizes)); + } return ret; } +bool ObExternalTableUtils::is_skipped_insert_column(const schema::ObColumnSchemaV2& column) +{ + bool is_skip = false; + if (OB_HIDDEN_FILE_ID_COLUMN_ID == column.get_column_id() + || OB_HIDDEN_LINE_NUMBER_COLUMN_ID == column.get_column_id()) { + // 外表插入时不写隐藏列 + is_skip = true; + } else if (column.is_tbl_part_key_column()) { + // 外表插入时不写分区键的列 + is_skip = true; + } + return is_skip; +} + } // namespace share } // namespace oceanbase diff --git a/src/share/external_table/ob_external_table_utils.h b/src/share/external_table/ob_external_table_utils.h index 7159734799..e80f17d7c2 100644 --- a/src/share/external_table/ob_external_table_utils.h +++ b/src/share/external_table/ob_external_table_utils.h @@ -16,6 +16,7 @@ #include "lib/container/ob_iarray.h" #include "lib/string/ob_string.h" #include "lib/allocator/page_arena.h" +#include "src/share/schema/ob_column_schema.h" namespace oceanbase { @@ -76,6 +77,10 @@ class ObExternalTableUtils { const int64_t &column_idx, int64_t &start_lineno, int64_t &end_lineno); + static int resolve_odps_start_step(const common::ObNewRange &range, + const int64_t &column_idx, + int64_t &start, + int64_t &step); static int convert_external_table_new_range(const common::ObString &file_url, const int64_t file_id, const uint64_t ref_table_id, @@ -111,6 +116,8 @@ class ObExternalTableUtils { const ObString &location, const ObString &access_info, const ObString &pattern, + const ObString &properties, + const bool &is_partitioned_table, const sql::ObExprRegexpSessionVariables ®exp_vars, ObIAllocator &allocator, common::ObSqlString &full_path, @@ -127,11 +134,6 @@ class ObExternalTableUtils { ObIArray &file_sizes, common::ObSqlString &partition_path, ObIAllocator &allocator); - - private: - static bool is_left_edge(const common::ObObj &value); - static bool is_right_edge(const common::ObObj &value); - static int64_t get_edge_value(const common::ObObj &edge); static int make_external_table_scan_range(const common::ObString &file_url, const int64_t file_id, const uint64_t ref_table_id, @@ -139,7 +141,12 @@ class ObExternalTableUtils { const int64_t last_lineno, common::ObIAllocator &allocator, common::ObNewRange &new_range); + static bool is_skipped_insert_column(const schema::ObColumnSchemaV2& column); + private: + static bool is_left_edge(const common::ObObj &value); + static bool is_right_edge(const common::ObObj &value); + static int64_t get_edge_value(const common::ObObj &edge); static int sort_external_files(ObIArray &file_urls, ObIArray &file_sizes); diff --git a/src/share/schema/ob_schema_printer.cpp b/src/share/schema/ob_schema_printer.cpp index 6fdd8343b4..4b9c7b886f 100644 --- a/src/share/schema/ob_schema_printer.cpp +++ b/src/share/schema/ob_schema_printer.cpp @@ -5559,10 +5559,15 @@ int ObSchemaPrinter::print_external_table_file_info(const ObTableSchema &table_s // 1. print file location, pattern const ObString &location = table_schema.get_external_file_location(); const ObString &pattern = table_schema.get_external_file_pattern(); + const ObString &format_string = table_schema.get_external_file_format(); + const ObString &properties_string = table_schema.get_external_properties(); const bool user_specified = table_schema.is_user_specified_partition_for_external_table(); - if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\nLOCATION='%.*s'", location.length(), location.ptr()))) { + bool is_odps_table = false; + if (OB_FAIL(ObSQLUtils::is_external_odps_table(properties_string, allocator, is_odps_table))) { + LOG_WARN("failed check is odps table or not", K(ret)); + } else if (!is_odps_table && OB_FAIL(databuff_printf(buf, buf_len, pos, "\nLOCATION='%.*s'", location.length(), location.ptr()))) { SHARE_SCHEMA_LOG(WARN, "fail to print LOCATION", K(ret)); - } else if (!pattern.empty() && OB_FAIL(databuff_printf(buf, buf_len, pos, "\nPATTERN='%.*s'", pattern.length(), pattern.ptr()))) { + } else if (!is_odps_table && !pattern.empty() && OB_FAIL(databuff_printf(buf, buf_len, pos, "\nPATTERN='%.*s'", pattern.length(), pattern.ptr()))) { SHARE_SCHEMA_LOG(WARN, "fail to print PATTERN", K(ret)); } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\nAUTO_REFRESH = %s", table_schema.get_external_table_auto_refresh() == 0 ? "OFF" : table_schema.get_external_table_auto_refresh() == 1 ? "IMMEDIATE" : "INTERVAL"))) { @@ -5576,13 +5581,19 @@ int ObSchemaPrinter::print_external_table_file_info(const ObTableSchema &table_s // 2. print file format if (OB_SUCC(ret)) { ObExternalFileFormat format; - if (OB_FAIL(format.load_from_string(table_schema.get_external_file_format(), allocator))) { + const ObString &format_or_properties = is_odps_table ? properties_string : format_string; + if (format_or_properties.empty()) { + ret = OB_ERR_UNEXPECTED; + SHARE_SCHEMA_LOG(WARN, "format_or_properties is empty", K(ret)); + } else if (OB_FAIL(format.load_from_string(format_or_properties, allocator))) { SHARE_SCHEMA_LOG(WARN, "fail to load from json string", K(ret)); } else if (!(format.format_type_ > ObExternalFileFormat::INVALID_FORMAT && format.format_type_ < ObExternalFileFormat::MAX_FORMAT)) { ret = OB_NOT_SUPPORTED; SHARE_SCHEMA_LOG(WARN, "unsupported to print file format", K(ret), K(format.format_type_)); - } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\nFORMAT (\n"))) { + } else if (!is_odps_table && OB_FAIL(databuff_printf(buf, buf_len, pos, "\nFORMAT (\n"))) { + SHARE_SCHEMA_LOG(WARN, "fail to print FORMAT (", K(ret)); + } else if (is_odps_table && OB_FAIL(databuff_printf(buf, buf_len, pos, "\nPROPERTIES (\n"))) { SHARE_SCHEMA_LOG(WARN, "fail to print FORMAT (", K(ret)); } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, " TYPE = '%s',", ObExternalFileFormat::FORMAT_TYPE_STR[format.format_type_]))) { SHARE_SCHEMA_LOG(WARN, "fail to print TYPE", K(ret)); @@ -5620,6 +5631,30 @@ int ObSchemaPrinter::print_external_table_file_info(const ObTableSchema &table_s databuff_printf(buf, buf_len, pos, "\n NULL_IF = (%.*s),", origin_format.origin_null_if_str_.length(), origin_format.origin_null_if_str_.ptr()))) { SHARE_SCHEMA_LOG(WARN, "fail to print NULL_IF", K(ret)); } + } else if (OB_SUCC(ret) && ObExternalFileFormat::ODPS_FORMAT == format.format_type_) { + const ObODPSGeneralFormat &odps = format.odps_format_; + ObString scret_str("********"); + if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[0], odps.access_type_.length(), odps.access_type_.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (!odps.access_id_.empty() && OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[1], scret_str.length(), scret_str.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (!odps.access_key_.empty() && OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[2], scret_str.length(), scret_str.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (!odps.sts_token_.empty() && OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[3], scret_str.length(), scret_str.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[4], odps.endpoint_.length(), odps.endpoint_.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[5], odps.project_.length(), odps.project_.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[6], odps.schema_.length(), odps.schema_.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[7], odps.table_.length(), odps.table_.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[8], odps.quota_.length(), odps.quota_.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "\n %s = '%.*s',", ObODPSGeneralFormat::OPTION_NAMES[9], odps.compression_code_.length(), odps.compression_code_.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "fail to print ODPS_INFO", K(ret)); + } } if (OB_SUCC(ret)) { --pos; diff --git a/src/share/schema/ob_schema_retrieve_utils.ipp b/src/share/schema/ob_schema_retrieve_utils.ipp index e58539ea69..2e1a60af36 100644 --- a/src/share/schema/ob_schema_retrieve_utils.ipp +++ b/src/share/schema/ob_schema_retrieve_utils.ipp @@ -1503,6 +1503,8 @@ int ObSchemaRetrieveUtils::fill_table_schema( bool, true, true/*ignore_column_error*/, false); EXTRACT_INT_FIELD_TO_CLASS_MYSQL_WITH_DEFAULT_VALUE(result, auto_increment_cache_size, table_schema, int64_t, true, true, 0); + EXTRACT_VARCHAR_FIELD_TO_CLASS_MYSQL_WITH_DEFAULT_VALUE( + result, external_properties, table_schema, true/*skip null*/, true/*ignore column error*/, empty_str); if (OB_SUCC(ret) && table_schema.is_materialized_view()) { bool skip_null_error = true; bool skip_column_error = true; diff --git a/src/share/schema/ob_schema_service.cpp b/src/share/schema/ob_schema_service.cpp index 7561595016..7ea59b53fe 100644 --- a/src/share/schema/ob_schema_service.cpp +++ b/src/share/schema/ob_schema_service.cpp @@ -424,6 +424,8 @@ int AlterTableSchema::assign(const ObTableSchema &src_schema) LOG_WARN("deep copy external_file_format failed", K(ret)); } else if (OB_FAIL(deep_copy_str(src_schema.external_file_pattern_, external_file_pattern_))) { LOG_WARN("deep copy external_file_pattern failed", K(ret)); + } else if (OB_FAIL(deep_copy_str(src_schema.external_properties_, external_properties_))) { + LOG_WARN("deep copy external_properties failed", K(ret)); } //view schema diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index cbb8eb68c0..6e61286a68 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -1529,7 +1529,6 @@ int ObTableSchema::assign(const ObTableSchema &src_schema) } else if (OB_FAIL(deep_copy_str(src_schema.external_properties_, external_properties_))) { LOG_WARN("deep copy external_properties failed", K(ret)); } - //view schema if (OB_SUCC(ret)) { view_schema_ = src_schema.view_schema_; diff --git a/src/share/schema/ob_table_sql_service.cpp b/src/share/schema/ob_table_sql_service.cpp index b72351697d..ea60525aa5 100644 --- a/src/share/schema/ob_table_sql_service.cpp +++ b/src/share/schema/ob_table_sql_service.cpp @@ -2972,6 +2972,7 @@ int ObTableSqlService::gen_table_dml( && (table.is_external_table() || !table.get_external_file_location().empty() || !table.get_external_file_format().empty() + || !table.get_external_properties().empty() || !table.get_external_file_location_access_info().empty() || !table.get_external_file_pattern().empty()))) { ret = OB_NOT_SUPPORTED; @@ -3152,6 +3153,8 @@ int ObTableSqlService::gen_table_dml( && OB_FAIL(dml.add_column("column_store", table.is_column_store_supported()))) || ((data_version >= DATA_VERSION_4_3_2_0 || (data_version < DATA_VERSION_4_3_0_0 && data_version >= MOCK_DATA_VERSION_4_2_3_0)) && OB_FAIL(dml.add_column("auto_increment_cache_size", table.get_auto_increment_cache_size()))) + || (data_version >= DATA_VERSION_4_3_2_1 && + OB_FAIL(dml.add_column("external_properties", ObHexEscapeSqlStr(table.get_external_properties())))) || (data_version >= DATA_VERSION_4_3_3_0 && OB_FAIL(dml.add_column("local_session_vars", ObHexEscapeSqlStr(local_session_var)))) ) { diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index 8371ef900f..80f9a10964 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -890,8 +890,9 @@ ob_set_subtarget(ob_sql engine_table engine/table/ob_index_lookup_op_impl.cpp engine/table/ob_table_scan_with_index_back_op.cpp engine/table/ob_external_table_access_service.cpp - engine/table/ob_orc_table_row_iter.cpp + # engine/table/ob_orc_table_row_iter.cpp engine/table/ob_parquet_table_row_iter.cpp + engine/table/ob_odps_table_row_iter.cpp ) ob_set_subtarget(ob_sql executor @@ -1399,8 +1400,24 @@ add_library(ob_sql_static STATIC EXCLUDE_FROM_ALL) +if(OB_BUILD_OPENSOURCE) target_link_libraries(ob_sql_static - PUBLIC ob_sql ob_sql_server_parser_static) + PUBLIC ob_sql + ob_sql_server_parser_static) +else() +target_link_libraries(ob_sql_static + PUBLIC ob_sql + ob_sql_server_parser_static + ${DEP_DIR}/lib64/libprotobuf.a + ${DEP_DIR}/lib64/libzstd.a + ${DEP_DIR}/lib64/libbrotlidec.a + ${DEP_DIR}/lib64/libbrotlienc.a + ${DEP_DIR}/lib64/libbrotlicommon.a + ${DEP_DIR}/lib64/libbz2.a + ${DEP_DIR}/lib64/libodps_sdk_tunnel_static.a + ${DEP_DIR}/lib64/libodps_sdk_common_static.a + ${DEP_DIR}/lib64/libodps_sdk_core_static.a) +endif() execute_process( COMMAND bash gen_parser.sh diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 5fe678e2c3..95c1eaffc7 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -7999,6 +7999,10 @@ int ObStaticEngineCG::generate_spec(ObLogSelectInto &op, ObSelectIntoSpec &spec, LOG_WARN("fail to set closed cht", K(op.get_closed_cht()), K(ret)); } else if (OB_FAIL(deep_copy_obj(alloc, op.get_escaped_cht(), spec.escaped_cht_))) { LOG_WARN("fail to set escaped cht", K(op.get_escaped_cht()), K(ret)); + } else if (OB_FAIL(spec.external_properties_.store_str(op.get_external_properties()))) { + LOG_WARN("fail to set external properties", K(op.get_external_properties()), K(ret)); + } else if (OB_FAIL(spec.external_partition_.store_str(op.get_external_partition()))) { + LOG_WARN("fail to set external partition", K(op.get_external_partition()), K(ret)); } else if (OB_FAIL(spec.user_vars_.init(op.get_user_vars().count()))) { LOG_WARN("init fixed array failed", K(ret), K(op.get_user_vars().count())); } else if (OB_FAIL(spec.select_exprs_.init(op.get_select_exprs().count()))) { @@ -8025,13 +8029,25 @@ int ObStaticEngineCG::generate_spec(ObLogSelectInto &op, ObSelectIntoSpec &spec, LOG_WARN("failed to push back expr", K(ret)); } } + if (OB_SUCC(ret)) { + ObExpr *rt_expr = nullptr; + const ObRawExpr* file_partition_expr = op.get_file_partition_expr(); + if (file_partition_expr == NULL) { + } else if (OB_FAIL(generate_rt_expr(*file_partition_expr, rt_expr))) { + LOG_WARN("failed to generate rt expr", K(ret)); + } else { + spec.file_partition_expr_ = rt_expr; + } + } if (OB_SUCC(ret)) { spec.into_type_ = op.get_into_type(); spec.is_optional_ = op.get_is_optional(); spec.is_single_ = op.get_is_single(); spec.max_file_size_ = op.get_max_file_size(); + spec.buffer_size_ = op.get_buffer_size(); spec.cs_type_ = op.get_cs_type(); spec.parallel_ = op.get_parallel(); + spec.is_overwrite_ = op.get_is_overwrite(); spec.plan_->need_drive_dml_query_ = true; } } diff --git a/src/sql/code_generator/ob_tsc_cg_service.cpp b/src/sql/code_generator/ob_tsc_cg_service.cpp index 42bb8d0c99..83e36724f4 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.cpp +++ b/src/sql/code_generator/ob_tsc_cg_service.cpp @@ -65,7 +65,13 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc LOG_WARN("fail to check location access priv", K(ret)); } else { scan_ctdef.is_external_table_ = true; - if (OB_FAIL(scan_ctdef.external_file_format_str_.store_str(table_schema->get_external_file_format()))) { + const ObString &format_or_properties = table_schema->get_external_file_format().empty() ? + table_schema->get_external_properties() : + table_schema->get_external_file_format(); + if (format_or_properties.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("format_or_properties is empty", K(ret)); + } else if (OB_FAIL(scan_ctdef.external_file_format_str_.store_str(format_or_properties))) { LOG_WARN("fail to set string", K(ret)); } else if (OB_FAIL(scan_ctdef.external_file_location_.store_str(table_schema->get_external_file_location()))) { LOG_WARN("fail to set string", K(ret)); diff --git a/src/sql/engine/basic/ob_select_into_op.cpp b/src/sql/engine/basic/ob_select_into_op.cpp index b16d1d0915..5b02c4096f 100644 --- a/src/sql/engine/basic/ob_select_into_op.cpp +++ b/src/sql/engine/basic/ob_select_into_op.cpp @@ -20,6 +20,7 @@ #include "share/ob_device_manager.h" #include "sql/resolver/ob_resolver_utils.h" #include "lib/charset/ob_charset_string_helper.h" +#include "sql/engine/px/ob_px_sqc_handler.h" namespace oceanbase { @@ -35,6 +36,65 @@ OB_SERIALIZE_MEMBER((ObSelectIntoSpec, ObOpSpec), into_type_, user_vars_, outfil int ObSelectIntoOp::inner_open() +{ + int ret = OB_SUCCESS; + bool need_check = false; + ObPhysicalPlanCtx *phy_plan_ctx = NULL; + ObSQLSessionInfo *session = NULL; + if (OB_ISNULL(session = ctx_.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get session failed", K(ret)); + } else { + // since we call get_next_row in inner_open, we have to set opened_ first in avoid to a infinite loop. + opened_ = true; + if (!lib::is_oracle_mode()) { + if (OB_FAIL(session->get_sql_select_limit(top_limit_cnt_))) { + LOG_WARN("fail tp get sql select limit", K(ret)); + } + } + } + if (OB_SUCC(ret) && !MY_SPEC.external_properties_.str_.empty()) { + if (OB_FAIL(external_properties_.load_from_string(MY_SPEC.external_properties_.str_, + ctx_.get_allocator()))) { + LOG_WARN("failed to load external properties", K(ret)); + } else { + format_type_ = external_properties_.format_type_; + } + } + if (OB_SUCC(ret)) { + switch (format_type_) + { + case ObExternalFileFormat::FormatType::CSV_FORMAT: + { + if (OB_FAIL(init_csv_env())) { + LOG_WARN("failed to init csv env", K(ret)); + } + break; + } + case ObExternalFileFormat::FormatType::ODPS_FORMAT: + { +#ifdef OB_BUILD_CPP_ODPS + if (OB_FAIL(init_odps_tunnel())) { + LOG_WARN("failed to init odps tunnel", K(ret)); + } +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support to write odps in opensource", K(ret)); +#endif + break; + } + default: + { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support select into type", K(format_type_)); + } + } + } + return ret; +} + +int ObSelectIntoOp::init_csv_env() { int ret = OB_SUCCESS; file_name_ = MY_SPEC.outfile_name_; @@ -44,18 +104,19 @@ int ObSelectIntoOp::inner_open() char_enclose_ = has_enclose_ ? MY_SPEC.closed_cht_.get_char().ptr()[0] : 0; has_escape_ = MY_SPEC.escaped_cht_.get_val_len() > 0; char_escape_ = has_escape_ ? MY_SPEC.escaped_cht_.get_char().ptr()[0] : 0; - ObSelectIntoOpInput *input = static_cast(input_); - int64_t row_count = 0; + do_partition_ = MY_SPEC.file_partition_expr_ == NULL ? false : true; bool need_check = false; ObPhysicalPlanCtx *phy_plan_ctx = NULL; ObSQLSessionInfo *session = NULL; const ObItemType into_type = MY_SPEC.into_type_; - if (OB_ISNULL(phy_plan_ctx = ctx_.get_physical_plan_ctx())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get phy_plan_ctx failed", K(ret)); - } else if (OB_ISNULL(session = ctx_.get_my_session())) { + if (OB_ISNULL(session = ctx_.get_my_session())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get session failed", K(ret)); + } else if (OB_FAIL(check_has_lob_or_json())) { + LOG_WARN("failed to check has lob", K(ret)); + } else if (OB_ISNULL(phy_plan_ctx = ctx_.get_physical_plan_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get phy_plan_ctx failed", K(ret)); } else if (OB_FAIL(ObSQLUtils::get_param_value(MY_SPEC.outfile_name_, phy_plan_ctx->get_param_store(), file_name_, @@ -73,124 +134,135 @@ int ObSelectIntoOp::inner_open() LOG_WARN("get param value failed", K(ret)); } else if (OB_FAIL(prepare_escape_printer())) { LOG_WARN("failed to calc escape info", K(ret)); - } else if (OB_FAIL(check_has_lob_or_json())) { - LOG_WARN("failed to check has lob", K(ret)); } else { print_params_.tz_info_ = session->get_timezone_info(); print_params_.use_memcpy_ = true; print_params_.binary_string_print_hex_ = lib::is_oracle_mode(); print_params_.cs_type_ = MY_SPEC.cs_type_; - // since we call get_next_row in inner_open, we have to set opened_ first in avoid to a infinite loop. - opened_ = true; - if (!lib::is_oracle_mode()) { - if (OB_FAIL(session->get_sql_select_limit(top_limit_cnt_))) { - LOG_WARN("fail tp get sql select limit", K(ret)); - } - } } //create buffer - if (OB_SUCC(ret)) { - const int64_t buf_len = has_lob_ ? (5 * OB_MALLOC_BIG_BLOCK_SIZE) : OB_MALLOC_BIG_BLOCK_SIZE; - char *buf = NULL; - if (OB_ISNULL(buf = static_cast(ctx_.get_allocator().alloc(buf_len)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate buffer", K(ret), K(buf_len)); - } else { - data_writer_.init(buf, buf_len); - } - if (has_json_ && has_escape_) { - const int64_t json_buf_len = OB_MALLOC_MIDDLE_BLOCK_SIZE; - char *json_buf = NULL; - if (OB_ISNULL(json_buf = static_cast(ctx_.get_allocator().alloc(json_buf_len)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate buffer", K(ret), K(json_buf_len)); - } else { - data_writer_.init_json_buf(json_buf, json_buf_len); - } - } + if (OB_SUCC(ret) && T_INTO_OUTFILE == into_type && OB_FAIL(create_shared_buffer_for_data_writer())) { + LOG_WARN("failed to create buffer for data writer", K(ret)); } + //calc first data_writer.url_ and basic_url_ if (OB_SUCC(ret)) { ObString path = file_name_.get_varchar().trim(); - ObSqlString file_name_with_suffix; - ObString input_file_name; file_location_ = path.prefix_match_ci(OB_OSS_PREFIX) ? IntoFileLocation::REMOTE_OSS : IntoFileLocation::SERVER_DISK; - if (T_INTO_OUTFILE == into_type && !MY_SPEC.is_single_ && OB_FAIL(calc_first_file_path(path))) { + if (file_location_ == IntoFileLocation::SERVER_DISK && do_partition_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support partition option on server disk", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "partition option on server disk"); + } else if (T_INTO_OUTFILE == into_type && !MY_SPEC.is_single_ && OB_FAIL(calc_first_file_path(path))) { LOG_WARN("failed to calc first file path", K(ret)); } else if (file_location_ == IntoFileLocation::REMOTE_OSS) { ObString temp_url = path.split_on('?'); temp_url.trim(); ObString storage_info; - if (OB_FAIL(ob_write_string(ctx_.get_allocator(), temp_url, url_, true))) { - LOG_WARN("fail to append string", K(ret)); + if (OB_FAIL(ob_write_string(ctx_.get_allocator(), temp_url, basic_url_, true))) { + LOG_WARN("failed to append string", K(ret)); } else if (OB_FAIL(ob_write_string(ctx_.get_allocator(), path, storage_info, true))) { - LOG_WARN("fail to append string", K(ret)); - } else if (OB_FAIL(access_info_.set(url_.ptr(), storage_info.ptr()))) { - LOG_WARN("fail to set access info", K(ret), K(path)); + LOG_WARN("failed to append string", K(ret)); + } else if (OB_FAIL(access_info_.set(basic_url_.ptr(), storage_info.ptr()))) { + LOG_WARN("failed to set access info", K(ret), K(path)); } //init device handle if (OB_SUCC(ret)) { ObBackupIoAdapter util; - if (url_.empty() || !access_info_.is_valid()) { + if (basic_url_.empty() || !access_info_.is_valid()) { ret = OB_FILE_NOT_EXIST; - LOG_WARN("file path not exist", K(ret), K(url_), K(access_info_)); - } else if (OB_FAIL(util.get_and_init_device(device_handle_, &access_info_, url_))) { - LOG_WARN("fail to init device", K(ret), K(url_), K(access_info_)); + LOG_WARN("file path not exist", K(ret), K(basic_url_), K(access_info_)); + } else if (OB_FAIL(util.get_and_init_device(device_handle_, &access_info_, basic_url_))) { + LOG_WARN("failed to init device", K(ret), K(basic_url_), K(access_info_)); } } } else { // IntoFileLocation::SERVER_DISK - if (OB_FAIL(ob_write_string(ctx_.get_allocator(), path, url_, true))) { - LOG_WARN("fail to write string", K(ret)); + if (OB_FAIL(ob_write_string(ctx_.get_allocator(), path, basic_url_, true))) { + LOG_WARN("failed to write string", K(ret)); } } } - if (OB_SUCC(ret) - && (T_INTO_OUTFILE == into_type || T_INTO_DUMPFILE == into_type) - && IntoFileLocation::SERVER_DISK == file_location_) { - ObString file_name = url_; - ObString file_path = file_name.split_on(file_name.reverse_find('/')); - char full_path_buf[PATH_MAX+1]; - char *actual_path = nullptr; - ObSqlString sql_str; - if (OB_FAIL(sql_str.append(file_path.empty() ? "." : file_path))) { - LOG_WARN("fail to append string", K(ret)); - } else if (OB_ISNULL(actual_path = realpath(sql_str.ptr(), full_path_buf))) { - ret = OB_FILE_NOT_EXIST; - LOG_WARN("file not exist", K(ret), K(sql_str)); - } - if (OB_SUCC(ret)) { - ObString secure_file_priv; - int64_t tenant_id = MTL_ID(); - if (OB_FAIL(ObSchemaUtils::get_tenant_varchar_variable( - tenant_id, - SYS_VAR_SECURE_FILE_PRIV, - ctx_.get_allocator(), - secure_file_priv))) { - LOG_WARN("fail get tenant variable", K(tenant_id), K(secure_file_priv), K(ret)); - } else if (OB_FAIL(ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) { - LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv)); - if (OB_ERR_NO_PRIVILEGE == ret) { - ret = OB_ERR_NO_PRIV_DIRECT_PATH_ACCESS; - LOG_ERROR("fail to check secure path", K(ret), K(secure_file_priv), K(session->get_is_deserialized())); - } - } - } + if (OB_SUCC(ret) && (T_INTO_OUTFILE == into_type || T_INTO_DUMPFILE == into_type) + && IntoFileLocation::SERVER_DISK == file_location_ && OB_FAIL(check_secure_file_path(basic_url_))) { + LOG_WARN("failed to check secure file path", K(ret)); + } + if (OB_SUCC(ret) && do_partition_) { + partition_map_.create(128, ObLabel("SelectInto"), ObLabel("SelectInto"), MTL_ID()); } return ret; } +#ifdef OB_BUILD_CPP_ODPS +int ObSelectIntoOp::init_odps_tunnel() +{ + int ret = OB_SUCCESS; + bool is_in_px = (NULL != ctx_.get_sqc_handler()); + ObSelectIntoOpInput *input = static_cast(input_); + if (OB_ISNULL(input)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("input is unexpected null", K(ret)); + } else if (is_in_px) { + ObOdpsPartitionDownloaderMgr &odps_mgr = ctx_.get_sqc_handler()->get_sqc_ctx().gi_pump_.get_odps_mgr(); + if (OB_FAIL(odps_mgr.get_odps_uploader(input->task_id_, upload_, record_writer_))) { + LOG_WARN("failed to get odps uploader", K(ret)); + } + } else if (OB_FAIL(external_properties_.odps_format_.decrypt())) { + LOG_WARN("failed to decrypt odps format", K(ret)); + } else { + ObMallocHookAttrGuard guard(ObMemAttr(MTL_ID(), "IntoOdps")); + try { + if (OB_FAIL(ObOdpsPartitionDownloaderMgr::create_upload_session(external_properties_.odps_format_, + MY_SPEC.external_partition_.str_, + MY_SPEC.is_overwrite_, + upload_))) { + LOG_WARN("failed to create upload session", K(ret)); + } else if (OB_UNLIKELY(!(record_writer_ = upload_->OpenWriter(block_id_, true)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when init odps tunnel", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when init odps tunnel", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when init odps tunnel", K(ret)); + } + } + } + if (OB_FAIL(ret)) { + need_commit_ = false; + } + return ret; +} +#endif + int ObSelectIntoOp::inner_get_next_row() { int ret = 0 == top_limit_cnt_ ? OB_ITER_END : OB_SUCCESS; int64_t row_count = 0; const ObItemType into_type = MY_SPEC.into_type_; ObPhysicalPlanCtx *phy_plan_ctx = NULL; + ObIOBufferWriter *data_writer = NULL; if (OB_ISNULL(phy_plan_ctx = ctx_.get_physical_plan_ctx())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get phy_plan_ctx failed", K(ret)); - } else if (T_INTO_OUTFILE == into_type && MY_SPEC.is_single_ && OB_FAIL(open_file())) { - LOG_WARN("failed to open file", K(ret)); + } + //when do_partition is false, create the only data_writer here + if (OB_SUCC(ret) && ObExternalFileFormat::FormatType::CSV_FORMAT == format_type_ + && T_INTO_VARIABLES != into_type && !do_partition_ + && OB_FAIL(create_the_only_data_writer(data_writer))) { + LOG_WARN("failed to create the only data writer", K(ret)); } while (OB_SUCC(ret) && row_count < top_limit_cnt_) { clear_evaluated_flag(); @@ -201,22 +273,33 @@ int ObSelectIntoOp::inner_get_next_row() } } else { ++row_count; - if (T_INTO_VARIABLES == into_type) { + if (ObExternalFileFormat::FormatType::ODPS_FORMAT == format_type_) { +#ifdef OB_BUILD_CPP_ODPS + if (OB_FAIL(into_odps())) { + LOG_WARN("into odps failed", K(ret)); + } +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support to write odps in opensource", K(ret)); +#endif + } else if (T_INTO_VARIABLES == into_type) { if (OB_FAIL(into_varlist())) { LOG_WARN("into varlist failed", K(ret)); } } else if (T_INTO_OUTFILE == into_type) { - if (OB_FAIL(into_outfile())) { + if (OB_FAIL(into_outfile(data_writer))) { LOG_WARN("into outfile failed", K(ret)); } } else { - if (OB_FAIL(into_dumpfile())) { + if (OB_FAIL(into_dumpfile(data_writer))) { LOG_WARN("into dumpfile failed", K(ret)); } } } if (OB_SUCC(ret) || OB_ITER_END == ret) { // if into user variables or into dumpfile, must be one row - if ((T_INTO_VARIABLES == into_type || T_INTO_DUMPFILE == into_type) && row_count > 1) { + if (ObExternalFileFormat::FormatType::CSV_FORMAT == format_type_ + && (T_INTO_VARIABLES == into_type || T_INTO_DUMPFILE == into_type) && row_count > 1) { ret = OB_ERR_TOO_MANY_ROWS; LOG_WARN("more than one row for into variables or into dumpfile", K(ret), K(row_count)); } @@ -225,6 +308,9 @@ int ObSelectIntoOp::inner_get_next_row() if (OB_ITER_END == ret || OB_SUCC(ret)) { // set affected rows phy_plan_ctx->set_affected_rows(row_count); } + if (OB_FAIL(ret) && OB_ITER_END != ret) { + need_commit_ = false; + } return ret; } @@ -236,14 +322,20 @@ int ObSelectIntoOp::inner_get_next_batch(const int64_t max_row_cnt) int64_t row_count = 0; const ObItemType into_type = MY_SPEC.into_type_; ObPhysicalPlanCtx *phy_plan_ctx = NULL; + ObIOBufferWriter *data_writer = NULL; + bool stop_loop = false; + bool is_iter_end = false; + LOG_TRACE("debug select into get next batch begin"); if (OB_ISNULL(phy_plan_ctx = ctx_.get_physical_plan_ctx())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get phy_plan_ctx failed", K(ret)); - } else if (T_INTO_OUTFILE == into_type && MY_SPEC.is_single_ && OB_FAIL(open_file())) { - LOG_WARN("failed to open file", K(ret)); } - bool stop_loop = false; - bool is_iter_end = false; + //when do_partition is false, create the only data_writer here + if (OB_SUCC(ret) && ObExternalFileFormat::FormatType::CSV_FORMAT == format_type_ + && T_INTO_VARIABLES != into_type && !do_partition_ + && OB_FAIL(create_the_only_data_writer(data_writer))) { + LOG_WARN("failed to create the only data writer", K(ret)); + } if (0 == top_limit_cnt_) { brs_.size_ = 0; brs_.end_ = true; @@ -261,8 +353,18 @@ int ObSelectIntoOp::inner_get_next_batch(const int64_t max_row_cnt) if (brs_.size_ > 0) { brs_.skip_->deep_copy(*(child_brs->skip_), brs_.size_); row_count += brs_.size_ - brs_.skip_->accumulate_bit_cnt(brs_.size_); - if (T_INTO_OUTFILE == into_type) { - if (OB_FAIL(into_outfile_batch(brs_))) { + if (ObExternalFileFormat::FormatType::ODPS_FORMAT == format_type_) { +#ifdef OB_BUILD_CPP_ODPS + if (OB_FAIL(into_odps_batch(brs_))) { + LOG_WARN("into odps batch failed", K(ret)); + } +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support to write odps in opensource", K(ret)); +#endif + } else if (T_INTO_OUTFILE == into_type) { + if (OB_FAIL(into_outfile_batch(brs_, data_writer))) { LOG_WARN("into outfile batch failed", K(ret)); } } else { @@ -278,7 +380,7 @@ int ObSelectIntoOp::inner_get_next_batch(const int64_t max_row_cnt) LOG_WARN("into varlist failed", K(ret)); } } else { - if (OB_FAIL(into_dumpfile())) { + if (OB_FAIL(into_dumpfile(data_writer))) { LOG_WARN("into dumpfile failed", K(ret)); } } @@ -299,6 +401,10 @@ int ObSelectIntoOp::inner_get_next_batch(const int64_t max_row_cnt) if (OB_SUCC(ret)) { // set affected rows phy_plan_ctx->set_affected_rows(row_count); } + if (OB_FAIL(ret)) { + need_commit_ = false; + } + LOG_TRACE("debug select into get next batch end"); return ret; } @@ -311,10 +417,29 @@ int ObSelectIntoOp::inner_rescan() int ObSelectIntoOp::inner_close() { int ret = OB_SUCCESS; - if (!has_lob_ && OB_FAIL(data_writer_.flush(get_flush_function()))) { + ObIOBufferWriter *data_writer = NULL; + if (ObExternalFileFormat::FormatType::ODPS_FORMAT == format_type_) { +#ifdef OB_BUILD_CPP_ODPS + if (OB_FAIL(odps_commit_upload())) { + LOG_WARN("failed to commit upload", K(ret)); + } +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support to write odps in opensource", K(ret)); +#endif + } else if (do_partition_) { + for (ObPartitionWriterMap::iterator iter = partition_map_.begin(); + OB_SUCC(ret) && iter != partition_map_.end(); iter++) { + if (OB_ISNULL(data_writer = iter->second)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data writer is unexpected null", K(ret)); + } else if (OB_FAIL(flush_buf(*data_writer))) { + LOG_WARN("failed to flush buffer", K(ret)); + } + } + } else if (OB_NOT_NULL(data_writer_) && OB_FAIL(flush_buf(*data_writer_))) { LOG_WARN("failed to flush buffer", K(ret)); - } else if (has_lob_ && OB_FAIL(data_writer_.flush_all_for_lob(get_flush_function()))) { - LOG_WARN("failed to flush buffer for lob", K(ret)); } return ret; } @@ -371,7 +496,7 @@ int ObSelectIntoOp::get_row_str(const int64_t buf_len, return ret; } -int ObSelectIntoOp::open_file() +int ObSelectIntoOp::open_file(ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; if (IntoFileLocation::REMOTE_OSS == file_location_) { @@ -381,21 +506,21 @@ int ObSelectIntoOp::open_file() iod_opts.opts_ = &opt; iod_opts.opt_cnt_ = 1; bool is_exist = false; - if (OB_FAIL(device_handle_->exist(url_.ptr(), is_exist))) { - LOG_WARN("failed to check file exist", K(ret), K(url_)); + if (OB_FAIL(device_handle_->exist(data_writer.url_.ptr(), is_exist))) { + LOG_WARN("failed to check file exist", K(ret), K(data_writer.url_)); } else if (is_exist) { ret = OB_FILE_ALREADY_EXIST; - LOG_WARN("file already exist", K(ret), K(url_)); - } else if (OB_FAIL(device_handle_->open(url_.ptr(), -1, 0, fd_, &iod_opts))) { + LOG_WARN("file already exist", K(ret), K(data_writer.url_)); + } else if (OB_FAIL(device_handle_->open(data_writer.url_.ptr(), -1, 0, data_writer.fd_, &iod_opts))) { LOG_WARN("failed to open file", K(ret)); } else { - is_file_opened_ = true; + data_writer.is_file_opened_ = true; } } else if (IntoFileLocation::SERVER_DISK == file_location_) { - if (OB_FAIL(file_appender_.create(url_, true))) { - LOG_WARN("failed to create file", K(ret), K(url_)); + if (OB_FAIL(data_writer.file_appender_.create(data_writer.url_, true))) { + LOG_WARN("failed to create file", K(ret), K(data_writer.url_)); } else { - is_file_opened_ = true; + data_writer.is_file_opened_ = true; } } else { ret = OB_ERR_UNEXPECTED; @@ -417,17 +542,17 @@ int ObSelectIntoOp::calc_first_file_path(ObString &path) LOG_WARN("get unexpected path or input is null", K(ret)); } else { if (input_file_name.ptr()[input_file_name.length() - 1] == '/'){ - file_name_with_suffix.append_fmt("%sdata", to_cstring(input_file_name)); + file_name_with_suffix.append_fmt("%.*sdata", input_file_name.length(), input_file_name.ptr()); } else { - file_name_with_suffix.append_fmt("%s", to_cstring(input_file_name)); + file_name_with_suffix.append_fmt("%.*s", input_file_name.length(), input_file_name.ptr()); } if (MY_SPEC.parallel_ > 1) { - file_name_with_suffix.append_fmt("_%ld_%ld_%ld", input->sqc_id_, input->task_id_, split_file_id_); + file_name_with_suffix.append_fmt("_%ld_%ld_%d", input->sqc_id_, input->task_id_, 0); } else { - file_name_with_suffix.append_fmt("_%ld", split_file_id_); + file_name_with_suffix.append_fmt("_%d", 0); } if (file_location_ == IntoFileLocation::REMOTE_OSS) { - file_name_with_suffix.append_fmt("?%s", to_cstring(path)); + file_name_with_suffix.append_fmt("?%.*s", path.length(), path.ptr()); } if (OB_FAIL(ob_write_string(ctx_.get_allocator(), file_name_with_suffix.string(), path))) { LOG_WARN("failed to write string", K(ret)); @@ -436,63 +561,111 @@ int ObSelectIntoOp::calc_first_file_path(ObString &path) return ret; } -int ObSelectIntoOp::calc_next_file_path() +int ObSelectIntoOp::calc_next_file_path(ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; ObSqlString url_with_suffix; ObString file_path; - if (split_file_id_ > 0) { + data_writer.split_file_id_++; + if (data_writer.split_file_id_ > 0) { if (MY_SPEC.is_single_ && IntoFileLocation::REMOTE_OSS == file_location_) { - file_path = (split_file_id_ > 1) ? url_.split_on(url_.reverse_find('.')) : url_; + file_path = (data_writer.split_file_id_ > 1) + ? data_writer.url_.split_on(data_writer.url_.reverse_find('.')) + : data_writer.url_; if (OB_FAIL(url_with_suffix.assign(file_path))) { - LOG_WARN("fail to assign string", K(ret)); - } else if (OB_FAIL(url_with_suffix.append_fmt(".extend%ld", split_file_id_))) { - LOG_WARN("fail to append string", K(ret)); + LOG_WARN("failed to assign string", K(ret)); + } else if (OB_FAIL(url_with_suffix.append_fmt(".extend%ld", data_writer.split_file_id_))) { + LOG_WARN("failed to append string", K(ret)); + } + } else if (!MY_SPEC.is_single_) { + file_path = data_writer.url_.split_on(data_writer.url_.reverse_find('_')); + if (OB_FAIL(url_with_suffix.assign(file_path))) { + LOG_WARN("failed to assign string", K(ret)); + } else if (OB_FAIL(url_with_suffix.append_fmt("_%ld", data_writer.split_file_id_))) { + LOG_WARN("failed to append string", K(ret)); } } else { - file_path = url_.split_on(url_.reverse_find('_')); - if (OB_FAIL(url_with_suffix.assign(file_path))) { - LOG_WARN("fail to assign string", K(ret)); - } else if (OB_FAIL(url_with_suffix.append_fmt("_%ld", split_file_id_))) { - LOG_WARN("fail to append string", K(ret)); - } + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected single value", K(ret)); } - if (OB_SUCC(ret) - && OB_FAIL(ob_write_string(ctx_.get_allocator(), url_with_suffix.string(), url_, true))) { - LOG_WARN("fail to write string", K(ret)); + if (OB_SUCC(ret) && OB_FAIL(ob_write_string(ctx_.get_allocator(), + url_with_suffix.string(), + data_writer.url_, true))) { + LOG_WARN("failed to write string", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected split file id", K(ret)); + } + return ret; +} + +// 根据传入的partition和basic_url_设置当前data_writer的url_, 每个分区只需要计算一次, 后续只要改split id +int ObSelectIntoOp::calc_file_path_with_partition(ObString partition, ObIOBufferWriter &data_writer) +{ + int ret = OB_SUCCESS; + ObSqlString url_with_partition; + ObString dir_path; + if (OB_FAIL(ob_write_string(ctx_.get_allocator(), basic_url_, data_writer.url_))) { + LOG_WARN("failed to write string", K(ret)); + } else { + dir_path = data_writer.url_.split_on(data_writer.url_.reverse_find('/')); + if (OB_FAIL(url_with_partition.assign(dir_path))) { + LOG_WARN("failed to assign string", K(ret)); + } else if (url_with_partition.length() != 0 && OB_FAIL(url_with_partition.append("/"))) { + LOG_WARN("failed to append string", K(ret)); + } else if (partition.length() != 0 && OB_FAIL(url_with_partition.append_fmt("%.*s/", + partition.length(), + partition.ptr()))) { + LOG_WARN("failed to append string", K(ret)); + } else if (partition.length() == 0 && OB_FAIL(url_with_partition.append("__NULL__/"))) { + LOG_WARN("failed to append string", K(ret)); + } else if (OB_FAIL(url_with_partition.append_fmt("%.*s", + data_writer.url_.length(), + data_writer.url_.ptr()))) { + LOG_WARN("failed to append string", K(ret)); + } else if (OB_FAIL(ob_write_string(ctx_.get_allocator(), + url_with_partition.string(), + data_writer.url_, + true))) { + LOG_WARN("failed to write string", K(ret)); } } return ret; } -void ObSelectIntoOp::close_file() +void ObSelectIntoOp::close_file(ObIOBufferWriter &data_writer) { if (IntoFileLocation::SERVER_DISK == file_location_) { - file_appender_.close(); + data_writer.file_appender_.close(); } else { - if (fd_.is_valid()) { - device_handle_->close(fd_); - fd_.reset(); + if (data_writer.fd_.is_valid()) { + device_handle_->close(data_writer.fd_); + data_writer.fd_.reset(); } } - is_file_opened_ = false; + data_writer.is_file_opened_ = false; } -std::function ObSelectIntoOp::get_flush_function() +std::function ObSelectIntoOp::get_flush_function() { - return [this](const char *data, int64_t data_len) -> int + return [this](const char *data, int64_t data_len, ObSelectIntoOp::ObIOBufferWriter *data_writer) -> int { int ret = OB_SUCCESS; - if (!is_file_opened_ && OB_FAIL(open_file())) { - LOG_WARN("failed to open file", K(ret), K(url_)); + if (data == NULL || data_len == 0) { + } else if (OB_ISNULL(data_writer)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null data writer", K(ret)); + } else if (!data_writer->is_file_opened_ && OB_FAIL(open_file(*data_writer))) { + LOG_WARN("failed to open file", K(ret), K(data_writer->url_)); } else if (file_location_ == IntoFileLocation::SERVER_DISK) { - if (OB_FAIL(file_appender_.append(data, data_len, false))) { + if (OB_FAIL(data_writer->file_appender_.append(data, data_len, false))) { LOG_WARN("failed to append file", K(ret), K(data_len)); } } else if (file_location_ == IntoFileLocation::REMOTE_OSS) { int64_t write_size = 0; int64_t begin_ts = ObTimeUtility::current_time(); - if (OB_FAIL(device_handle_->write(fd_, data, data_len, write_size))) { + if (OB_FAIL(device_handle_->write(data_writer->fd_, data, data_len, write_size))) { LOG_WARN("failed to write device", K(ret)); } else if (OB_UNLIKELY(write_size != data_len)) { ret = OB_IO_ERROR; @@ -515,129 +688,192 @@ std::function ObSelectIntoOp::get_flush_function() }; } -int ObSelectIntoOp::split_file() +int ObSelectIntoOp::split_file(ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; - int64_t dummy_pos = 0; - if (OB_FAIL(flush_buf(dummy_pos))) { - LOG_WARN("fail to flush buffer", K(ret)); + if (!use_shared_buf_ && OB_FAIL(flush_buf(data_writer))) { + LOG_WARN("failed to flush buffer", K(ret)); + } else if (has_lob_ && use_shared_buf_ && OB_FAIL(flush_shared_buf(data_writer, get_flush_function()))) { + // 要保证文件中每一行的完整性, 有lob的时候shared buffer里不一定是完整的一行 + // 因此剩下的shared buffer里的内容也要刷到当前文件里, 这种情况下无法严格满足max_file_size的限制 + LOG_WARN("failed to flush shared buffer", K(ret)); } else { - close_file(); + close_file(data_writer); } - - //rename the first file name - /* rename not support for current version - if (OB_SUCC(ret) && 0 == split_file_id_) { - ObSqlString url_old; - ObSqlString url_new; - - if (OB_FAIL(url_old.assign(url_))) { - LOG_WARN("fail to assign string", K(ret)); - } else if (OB_FAIL(url_new.assign(url_))) { - LOG_WARN("fail to assign string", K(ret)); - } else if (OB_FAIL(url_new.append_fmt(".part%ld", split_file_id_))) { - LOG_WARN("fail to append format", K(ret)); - } else if (OB_FAIL(device_handle_->rename(url_old.ptr(), url_new.ptr()))) { - LOG_WARN("fail to rename", K(ret)); - } - } - */ - - //create new file - if (OB_SUCC(ret)) { - split_file_id_++; - if (OB_FAIL(calc_next_file_path())) { - LOG_WARN("failed to calculate new file path", K(ret)); - } + if (OB_SUCC(ret) && OB_FAIL(calc_next_file_path(data_writer))) { + LOG_WARN("failed to calculate new file path", K(ret)); } return ret; } -int ObSelectIntoOp::try_split_file() +int ObSelectIntoOp::try_split_file(ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; - const int64_t MAX_OSS_FILE_SIZE = 5LL * 1024 * 1024 * 1024; //5G int64_t curr_line_len = 0; int64_t curr_bytes = 0; bool has_split = false; - if (!has_lob_ || data_writer_.get_curr_line_len() == 0) { - curr_line_len = data_writer_.get_curr_pos() - data_writer_.get_last_line_pos(); + bool has_use_shared_buf = use_shared_buf_; + if (!has_lob_ || data_writer.get_curr_line_len() == 0) { + curr_line_len = data_writer.get_curr_pos() - data_writer.get_last_line_pos(); } else { - curr_line_len = data_writer_.get_curr_pos() + data_writer_.get_curr_line_len(); + curr_line_len = data_writer.get_curr_pos() + data_writer.get_curr_line_len(); } - curr_bytes = write_bytes_ + curr_line_len; - if (!has_lob_ && data_writer_.get_last_line_pos() == 0) { + curr_bytes = data_writer.get_write_bytes() + curr_line_len; + if (!(has_lob_ && has_use_shared_buf) && data_writer.get_write_bytes() == 0) { } else if ((file_location_ == IntoFileLocation::SERVER_DISK && !MY_SPEC.is_single_ && curr_bytes > MY_SPEC.max_file_size_) || (file_location_ == IntoFileLocation::REMOTE_OSS && ((!MY_SPEC.is_single_ && curr_bytes > min(MY_SPEC.max_file_size_, MAX_OSS_FILE_SIZE)) || (MY_SPEC.is_single_ && curr_bytes > MAX_OSS_FILE_SIZE)))) { - if (OB_FAIL(split_file())) { + if (OB_FAIL(split_file(data_writer))) { LOG_WARN("failed to split file", K(ret)); } else { has_split = true; } } if (OB_SUCC(ret)) { - if (!has_lob_) { - write_bytes_ = has_split ? curr_line_len : curr_bytes; + if (has_lob_ && has_use_shared_buf) { + data_writer.set_write_bytes(has_split ? 0 : curr_bytes); + data_writer.reset_curr_line_len(); } else { - write_bytes_ = has_split ? 0 : curr_bytes; - data_writer_.reset_curr_line_len(); + data_writer.set_write_bytes(has_split ? curr_line_len : curr_bytes); } - data_writer_.update_last_line_pos(); + data_writer.update_last_line_pos(); } return ret; } -void ObSelectIntoOp::get_buf(char* &buf, int64_t &buf_len, int64_t &pos, bool is_json) -{ - buf = is_json ? data_writer_.get_json_buf() : data_writer_.get_buf(); - buf_len = is_json ? data_writer_.get_json_buf_len() : data_writer_.get_buf_len(); - pos = is_json ? 0 : data_writer_.get_curr_pos(); -} - -int ObSelectIntoOp::flush_buf(int64_t &pos) +int ObSelectIntoOp::get_buf(char* &buf, int64_t &buf_len, int64_t &pos, ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; - if (!has_lob_ && OB_FAIL(data_writer_.flush(get_flush_function()))) { - LOG_WARN("failed to flush buffer", K(ret)); - } else if (has_lob_ && OB_FAIL(data_writer_.flush_all_for_lob(get_flush_function()))) { - LOG_WARN("failed to flush buffer for lob", K(ret)); - } else { - pos = data_writer_.get_curr_pos(); + buf = use_shared_buf_ ? get_shared_buf() : data_writer.get_buf(); + buf_len = use_shared_buf_ ? get_shared_buf_len() : data_writer.get_buf_len(); + pos = data_writer.get_curr_pos(); + if (OB_ISNULL(buf) && !use_shared_buf_ && OB_FAIL(use_shared_buf(data_writer, buf, buf_len, pos))) { + LOG_WARN("failed to use shared buffer", K(ret)); + } else if (OB_ISNULL(buf)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buf should not be null", K(ret)); } return ret; } -int ObSelectIntoOp::resize_buf(char* &buf, int64_t &buf_len, int64_t &pos, bool is_json) +int ObSelectIntoOp::flush_buf(ObIOBufferWriter &data_writer) +{ + int ret = OB_SUCCESS; + if (use_shared_buf_) { + // do nothing + } else if (OB_FAIL(data_writer.flush(get_flush_function()))) { + LOG_WARN("failed to flush buffer", K(ret)); + } + return ret; +} + +int ObSelectIntoOp::use_shared_buf(ObIOBufferWriter &data_writer, + char* &buf, + int64_t &buf_len, + int64_t &pos) { + int ret = OB_SUCCESS; + int64_t curr_pos = data_writer.get_curr_pos(); + if (!use_shared_buf_ && data_writer.get_last_line_pos() == 0) { + if (OB_NOT_NULL(data_writer.get_buf()) && curr_pos > 0) { + MEMCPY(shared_buf_, data_writer.get_buf(), curr_pos); + } + use_shared_buf_ = true; + buf = shared_buf_; + buf_len = shared_buf_len_; + pos = curr_pos; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("last line should be flushed before this line copied", K(ret)); + } + return ret; +} + +int ObSelectIntoOp::resize_buf(char* &buf, + int64_t &buf_len, + int64_t &pos, + int64_t curr_pos, + bool is_json) { int ret = OB_SUCCESS; int64_t new_buf_len = buf_len * 2; char* new_buf = NULL; - int curr_pos = data_writer_.get_curr_pos(); if (OB_ISNULL(new_buf = static_cast(ctx_.get_allocator().alloc(new_buf_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate buffer", K(ret), K(new_buf_len)); + LOG_WARN("failed to allocate buffer", K(ret), K(new_buf_len)); } else if (!is_json) { - data_writer_.init(new_buf, new_buf_len); if (curr_pos > 0) { - MEMCPY(new_buf, buf, curr_pos); + MEMCPY(new_buf, shared_buf_, curr_pos); } + shared_buf_ = new_buf; + shared_buf_len_ = new_buf_len; } else { - data_writer_.init_json_buf(new_buf, new_buf_len); + json_buf_ = new_buf; + json_buf_len_ = new_buf_len; } if (OB_SUCC(ret)) { - get_buf(buf, buf_len, pos, is_json); + buf = new_buf; + buf_len = new_buf_len; + pos = is_json ? 0 : curr_pos; } return ret; } -int ObSelectIntoOp::write_obj_to_file(const ObObj &obj, bool need_escape) +int ObSelectIntoOp::resize_or_flush_shared_buf(ObIOBufferWriter &data_writer, + char* &buf, + int64_t &buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + if (!use_shared_buf_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid argument", K(use_shared_buf_), K(ret)); + } else if (has_lob_ && data_writer.get_curr_pos() > 0) { + if (OB_FAIL(flush_shared_buf(data_writer, get_flush_function(), true))) { + LOG_WARN("failed to flush shared buffer", K(ret)); + } else { + pos = 0; + } + } else if (OB_FAIL(resize_buf(buf, buf_len, pos, data_writer.get_curr_pos()))) { + LOG_WARN("failed to resize shared buffer", K(ret)); + } + return ret; +} + +int ObSelectIntoOp::check_buf_sufficient(ObIOBufferWriter &data_writer, + char* &buf, + int64_t &buf_len, + int64_t &pos, + int64_t str_len) +{ + int ret = OB_SUCCESS; + if (buf_len < str_len * 1.1) { + if (OB_FAIL(flush_buf(data_writer))) { + LOG_WARN("failed to flush buffer", K(ret)); + } else if (OB_FAIL(use_shared_buf(data_writer, buf, buf_len, pos))) { + LOG_WARN("failed to use shared buffer", K(ret)); + } + } + return ret; +} + +int ObSelectIntoOp::write_obj_to_file(const ObObj &obj, ObIOBufferWriter &data_writer, bool need_escape) +{ + int ret = OB_SUCCESS; + if ((obj.is_string_type() || obj.is_json()) && need_escape) { + if (OB_FAIL(print_str_or_json_with_escape(obj, data_writer))) { + LOG_WARN("failed to print str or json with escape", K(ret)); + } + } else if (OB_FAIL(print_normal_obj_without_escape(obj, data_writer))) { + LOG_WARN("failed to print normal obj without escape", K(ret)); + } + return ret; +} + +int ObSelectIntoOp::print_str_or_json_with_escape(const ObObj &obj, ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - bool print_succ = false; char* buf = NULL; int64_t buf_len = 0; int64_t pos = 0; @@ -647,103 +883,154 @@ int ObSelectIntoOp::write_obj_to_file(const ObObj &obj, bool need_escape) || src_type == CHARSET_INVALID); escape_printer_.need_enclose_ = has_enclose_ && !obj.is_null() && (!MY_SPEC.is_optional_ || obj.is_string_type()); - escape_printer_.do_escape_ = need_escape; + escape_printer_.do_escape_ = true; escape_printer_.print_hex_ = obj.get_collation_type() == CS_TYPE_BINARY && print_params_.binary_string_print_hex_; ObString str_to_escape; - - if ((obj.is_string_type() || obj.is_json()) && need_escape) { - if (obj.is_json()) { - ObObj inrow_obj = obj; - if (obj.is_lob_storage()) { - ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); - common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); - if (OB_FAIL(ObTextStringIter::convert_outrow_lob_to_inrow_templob(obj, inrow_obj, NULL, &temp_allocator))) { - LOG_WARN("failed to convert outrow lobs", K(ret), K(obj)); + ObEvalCtx::TempAllocGuard tmp_alloc_g(eval_ctx_); + common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); + if (OB_FAIL(get_buf(escape_printer_.buf_, escape_printer_.buf_len_, escape_printer_.pos_, data_writer))) { + LOG_WARN("failed to get buffer", K(ret)); + } else if (obj.is_json()) { + ObObj inrow_obj = obj; + if (obj.is_lob_storage() + && OB_FAIL(ObTextStringIter::convert_outrow_lob_to_inrow_templob(obj, inrow_obj, NULL, &temp_allocator))) { + LOG_WARN("failed to convert outrow lobs", K(ret), K(obj)); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(print_json_to_json_buf(inrow_obj, buf, buf_len, pos, data_writer))) { + LOG_WARN("failed to print normal obj without escape", K(ret)); + } else { + str_to_escape.assign_ptr(buf, pos); + escape_printer_.do_encode_ = false; + } + } else { + str_to_escape = obj.get_varchar(); + } + if (OB_SUCC(ret) && !use_shared_buf_ && OB_FAIL(check_buf_sufficient(data_writer, + escape_printer_.buf_, + escape_printer_.buf_len_, + escape_printer_.pos_, + str_to_escape.length()))) { + LOG_WARN("failed to check if buf is sufficient", K(ret)); + } + if (OB_SUCC(ret) && !use_shared_buf_) { + if (OB_FAIL(ObFastStringScanner::foreach_char(str_to_escape, + src_type, + escape_printer_, + escape_printer_.do_encode_, + escape_printer_.ignore_convert_failed_))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to print plain str", K(ret), K(src_type), K(escape_printer_.do_encode_)); + } else if (OB_FAIL(flush_buf(data_writer))) { + LOG_WARN("failed to flush buffer", K(ret)); + } else if (OB_FALSE_IT(escape_printer_.pos_ = data_writer.get_curr_pos())) { + } else if (OB_FAIL(ObFastStringScanner::foreach_char(str_to_escape, + src_type, + escape_printer_, + escape_printer_.do_encode_, + escape_printer_.ignore_convert_failed_))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to print plain str", K(ret), K(src_type), K(escape_printer_.do_encode_)); + } else if (OB_FAIL(use_shared_buf(data_writer, + escape_printer_.buf_, + escape_printer_.buf_len_, + escape_printer_.pos_))) { + LOG_WARN("failed to use shared buffer", K(ret)); } } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(print_normal_obj_without_escape(inrow_obj, buf, buf_len, pos, true))) { - LOG_WARN("failed to print normal obj without escape", K(ret)); - } else { - str_to_escape.assign_ptr(buf, pos); - escape_printer_.do_encode_ = false; - } - } else { - str_to_escape = obj.get_varchar(); } - if (OB_SUCC(ret)) { - get_buf(escape_printer_.buf_, escape_printer_.buf_len_, escape_printer_.pos_); - } - for (int i = 0; OB_SUCC(ret) && !print_succ; ++i) { + } + if (OB_SUCC(ret) && use_shared_buf_) { + do { if (OB_FAIL(ObFastStringScanner::foreach_char(str_to_escape, src_type, escape_printer_, escape_printer_.do_encode_, escape_printer_.ignore_convert_failed_))) { - if (OB_SIZE_OVERFLOW == ret) { - if (i == 0 && OB_UNLIKELY(OB_SUCCESS != (tmp_ret = flush_buf(escape_printer_.pos_)))) { - LOG_WARN("failed to flush buffer", K(tmp_ret), K(ret)); - } else if (i > 0 && OB_UNLIKELY(OB_SUCCESS != (tmp_ret = resize_buf( - escape_printer_.buf_, - escape_printer_.buf_len_, - escape_printer_.pos_)))) { - LOG_WARN("failed to resize buffer", K(tmp_ret), K(ret)); - } else { - ret = OB_SUCCESS; - } - } else { - LOG_WARN("failed to print plain str", K(ret), K(src_type), K(escape_printer_.do_encode_)); - } - } else { - print_succ = true; + LOG_WARN("failed to print plain str", K(ret), K(src_type), K(escape_printer_.do_encode_)); } - } - if (OB_SUCC(ret)) { - data_writer_.set_curr_pos(escape_printer_.pos_); - } - } else { - if (OB_FAIL(print_normal_obj_without_escape(obj, buf, buf_len, pos))) { - LOG_WARN("failed to print normal obj without escape", K(ret)); - } else { - data_writer_.set_curr_pos(pos); + } while (OB_SIZE_OVERFLOW == ret && OB_SUCC(resize_or_flush_shared_buf(data_writer, + escape_printer_.buf_, + escape_printer_.buf_len_, + escape_printer_.pos_))); + if (OB_FAIL(ret)) { + LOG_WARN("failed to print plain str", K(ret)); } } + if (OB_SUCC(ret)) { + data_writer.set_curr_pos(escape_printer_.pos_); + } + return ret; } -int ObSelectIntoOp::print_normal_obj_without_escape(const ObObj &obj, - char* &buf, - int64_t &buf_len, - int64_t &pos, - bool is_json) +int ObSelectIntoOp::print_normal_obj_without_escape(const ObObj &obj, ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - bool print_succ = false; - get_buf(buf, buf_len, pos, is_json); - for (int i = 0; OB_SUCC(ret) && !print_succ; ++i) { + char* buf = NULL; + int64_t buf_len = 0; + int64_t pos = 0; + OZ(get_buf(buf, buf_len, pos, data_writer)); + if (OB_SUCC(ret) && !use_shared_buf_) { if (OB_FAIL(obj.print_plain_str_literal(buf, buf_len, pos, print_params_))) { - if (OB_SIZE_OVERFLOW == ret) { - if (i == 0 && !is_json && OB_UNLIKELY(OB_SUCCESS != (tmp_ret = flush_buf(pos)))) { - LOG_WARN("failed to flush buffer", K(tmp_ret), K(ret)); - } else if ((i > 0 || is_json) - && OB_UNLIKELY(OB_SUCCESS != (tmp_ret = resize_buf(buf, buf_len, pos, is_json)))) { - LOG_WARN("failed to resize buffer", K(tmp_ret), K(ret)); - } else { - ret = OB_SUCCESS; + if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to print obj", K(ret)); + } else if (OB_FAIL(flush_buf(data_writer))) { + LOG_WARN("failed to flush buffer", K(ret)); + } else if (OB_FALSE_IT(pos = data_writer.get_curr_pos())) { + } else if (OB_FAIL(obj.print_plain_str_literal(buf, buf_len, pos, print_params_))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to print obj", K(ret)); + } else if (OB_FAIL(use_shared_buf(data_writer, buf, buf_len, pos))) { + LOG_WARN("failed to use shared buffer", K(ret)); } - } else { - LOG_WARN("failed to print plain str", K(ret)); } - } else { - print_succ = true; } } + if (OB_SUCC(ret) && use_shared_buf_) { + do { + if (OB_FAIL(obj.print_plain_str_literal(buf, buf_len, pos, print_params_))) { + LOG_WARN("failed to print obj", K(ret)); + } + } while (OB_SIZE_OVERFLOW == ret + && OB_SUCC(resize_or_flush_shared_buf(data_writer, buf, buf_len, pos))); + if (OB_FAIL(ret)) { + LOG_WARN("failed to print obj", K(ret)); + } + } + if (OB_SUCC(ret)) { + data_writer.set_curr_pos(pos); + } return ret; } -int ObSelectIntoOp::write_lob_to_file(const ObObj &obj, const ObExpr &expr, const ObDatum &datum) +int ObSelectIntoOp::print_json_to_json_buf(const ObObj &obj, + char* &buf, + int64_t &buf_len, + int64_t &pos, + ObIOBufferWriter &data_writer) +{ + int ret = OB_SUCCESS; + buf = get_json_buf(); + buf_len = get_json_buf_len(); + pos = 0; + do { + if (OB_FAIL(obj.print_plain_str_literal(buf, buf_len, pos, print_params_))) { + LOG_WARN("failed to print obj", K(ret)); + } + } while (OB_SIZE_OVERFLOW == ret + && OB_SUCC(resize_buf(buf, buf_len, pos, data_writer.get_curr_pos(), true))); + if (OB_FAIL(ret)) { + LOG_WARN("failed to print json to json buffer", K(ret)); + } + return ret; +} + +int ObSelectIntoOp::write_lob_to_file(const ObObj &obj, + const ObExpr &expr, + const ObDatum &datum, + ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; ObCharsetType src_type = ObCharset::charset_type_by_coll(obj.get_collation_type()); @@ -754,8 +1041,6 @@ int ObSelectIntoOp::write_lob_to_file(const ObObj &obj, const ObExpr &expr, cons escape_printer_.do_escape_ = has_escape_; escape_printer_.print_hex_ = obj.get_collation_type() == CS_TYPE_BINARY && print_params_.binary_string_print_hex_; - get_buf(escape_printer_.buf_, escape_printer_.buf_len_, escape_printer_.pos_); - ObDatumMeta input_meta = expr.datum_meta_; ObTextStringIterState state; ObString src_block_data; @@ -765,36 +1050,93 @@ int ObSelectIntoOp::write_lob_to_file(const ObObj &obj, const ObExpr &expr, cons common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); int64_t truncated_len = 0; bool stop_when_truncated = false; + OZ(lob_iter.init(0, NULL, &temp_allocator)); + OZ(get_buf(escape_printer_.buf_, escape_printer_.buf_len_, escape_printer_.pos_, data_writer)); - if (OB_FAIL(lob_iter.init(0, NULL, &temp_allocator))) { - LOG_WARN("init lob_iter failed ", K(ret), K(lob_iter)); - } // 当truncated_len == src_block_data.length()时 // 表明当前foreach_char处理的仅为lob末尾的无效的数据, 即上一轮的truncated data, 要避免死循环 while (OB_SUCC(ret) && (state = lob_iter.get_next_block(src_block_data)) == TEXTSTRING_ITER_NEXT) { // outrow lob最后一次才有可能为false, inrow lob只迭代一次, 为false stop_when_truncated = (truncated_len != src_block_data.length()) && lob_iter.is_outrow_lob(); - if ((escape_printer_.buf_len_ - escape_printer_.pos_) < (src_block_data.length() * 5) - && OB_FAIL(flush_buf(escape_printer_.pos_))) { - LOG_WARN("failed to flush buf", K(ret)); - } else if (OB_FAIL(ObFastStringScanner::foreach_char(src_block_data, - src_type, - escape_printer_, - escape_printer_.do_encode_, - escape_printer_.ignore_convert_failed_, - stop_when_truncated, - &truncated_len))) { - if (OB_ERR_DATA_TRUNCATED == ret && stop_when_truncated) { - lob_iter.set_reserved_byte_len(truncated_len); - ret = OB_SUCCESS; - } else { - LOG_WARN("failed to print lob", K(ret)); + if (!use_shared_buf_ && OB_FAIL(check_buf_sufficient(data_writer, + escape_printer_.buf_, + escape_printer_.buf_len_, + escape_printer_.pos_, + src_block_data.length()))) { + LOG_WARN("failed to check if buf is sufficient", K(ret)); + } + if (OB_SUCC(ret) && !use_shared_buf_) { + if (OB_FAIL(ObFastStringScanner::foreach_char(src_block_data, + src_type, + escape_printer_, + escape_printer_.do_encode_, + escape_printer_.ignore_convert_failed_, + stop_when_truncated, + &truncated_len))) { + if (OB_ERR_DATA_TRUNCATED == ret && stop_when_truncated) { + lob_iter.set_reserved_byte_len(truncated_len); + ret = OB_SUCCESS; + } else if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to print lob", K(ret)); + } else if (OB_FAIL(flush_buf(data_writer))) { + LOG_WARN("failed to flush buffer", K(ret)); + } else if (OB_FALSE_IT(escape_printer_.pos_ = data_writer.get_curr_pos())) { + } else if (OB_FAIL(ObFastStringScanner::foreach_char(src_block_data, + src_type, + escape_printer_, + escape_printer_.do_encode_, + escape_printer_.ignore_convert_failed_, + stop_when_truncated, + &truncated_len))) { + if (OB_ERR_DATA_TRUNCATED == ret && stop_when_truncated) { + lob_iter.set_reserved_byte_len(truncated_len); + ret = OB_SUCCESS; + } else if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to print lob", K(ret)); + } else if (OB_FAIL(use_shared_buf(data_writer, + escape_printer_.buf_, + escape_printer_.buf_len_, + escape_printer_.pos_))) { + LOG_WARN("failed to use shared buffer", K(ret)); + } + } } } - if (OB_SUCC(ret)) { - data_writer_.set_curr_pos(escape_printer_.pos_); + if (OB_SUCC(ret) && use_shared_buf_) { + if (OB_FAIL(ObFastStringScanner::foreach_char(src_block_data, + src_type, + escape_printer_, + escape_printer_.do_encode_, + escape_printer_.ignore_convert_failed_, + stop_when_truncated, + &truncated_len))) { + if (OB_ERR_DATA_TRUNCATED == ret && stop_when_truncated) { + lob_iter.set_reserved_byte_len(truncated_len); + ret = OB_SUCCESS; + } else if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to print lob", K(ret)); + } else if (OB_FAIL(flush_shared_buf(data_writer, get_flush_function(), true))) { + LOG_WARN("failed to flush shared buffer", K(ret)); + } else if (OB_FALSE_IT(escape_printer_.pos_ = 0)) { + } else if (OB_FAIL(ObFastStringScanner::foreach_char(src_block_data, + src_type, + escape_printer_, + escape_printer_.do_encode_, + escape_printer_.ignore_convert_failed_, + stop_when_truncated, + &truncated_len))) { + if (OB_ERR_DATA_TRUNCATED == ret && stop_when_truncated) { + lob_iter.set_reserved_byte_len(truncated_len); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to print lob", K(ret), K(src_block_data.length()), K(shared_buf_len_), + K(data_writer.get_curr_pos()), K(escape_printer_.buf_len_), K(escape_printer_.pos_)); + } + } + } } + data_writer.set_curr_pos(escape_printer_.pos_); } if (OB_FAIL(ret)) { } else if (state != TEXTSTRING_ITER_NEXT && state != TEXTSTRING_ITER_END) { @@ -805,72 +1147,101 @@ int ObSelectIntoOp::write_lob_to_file(const ObObj &obj, const ObExpr &expr, cons return ret; } -int ObSelectIntoOp::write_single_char_to_file(const char *wchar) +int ObSelectIntoOp::write_single_char_to_file(const char *wchar, ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; char* buf = NULL; int64_t buf_len = 0; int64_t pos = 0; - get_buf(buf, buf_len, pos); - if (pos == buf_len && OB_FAIL(flush_buf(pos))) { - LOG_WARN("failed to flush buffer", K(ret)); - } else if (pos < buf_len) { - MEMCPY(buf + pos, wchar, 1); - data_writer_.set_curr_pos(pos + 1); - } else if (OB_FAIL(resize_buf(buf, buf_len, pos))) { - LOG_WARN("failed to resize buffer", K(ret)); - } else if (pos < buf_len) { - MEMCPY(buf + pos, wchar, 1); - data_writer_.set_curr_pos(pos + 1); - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected error", K(ret)); + OZ(get_buf(buf, buf_len, pos, data_writer)); + if (OB_SUCC(ret) && !use_shared_buf_) { + if (pos < buf_len) { + MEMCPY(buf + pos, wchar, 1); + data_writer.set_curr_pos(pos + 1); + } else if (OB_FAIL(flush_buf(data_writer))) { + LOG_WARN("failed to flush buffer", K(ret)); + } else if (OB_FALSE_IT(pos = data_writer.get_curr_pos())) { + } else if (pos < buf_len) { + MEMCPY(buf + pos, wchar, 1); + data_writer.set_curr_pos(pos + 1); + } else if (OB_FAIL(use_shared_buf(data_writer, buf, buf_len, pos))) { + LOG_WARN("failed to use shared buffer", K(ret)); + } + } + if (OB_SUCC(ret) && use_shared_buf_) { + if (pos < buf_len) { + MEMCPY(buf + pos, wchar, 1); + data_writer.set_curr_pos(pos + 1); + } else if (OB_FAIL(resize_or_flush_shared_buf(data_writer, buf, buf_len, pos))) { + LOG_WARN("failed to resize or flush shared buffer", K(ret)); + } else if (pos < buf_len) { + MEMCPY(buf + pos, wchar, 1); + data_writer.set_curr_pos(pos + 1); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret)); + } } return ret; } -int ObSelectIntoOp::print_lob_field(const ObObj &obj, const ObExpr &expr, const ObDatum &datum) +int ObSelectIntoOp::print_lob_field(const ObObj &obj, + const ObExpr &expr, + const ObDatum &datum, + ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; if (has_enclose_) { - OZ(write_single_char_to_file(&char_enclose_)); + OZ(write_single_char_to_file(&char_enclose_, data_writer)); } - OZ(write_lob_to_file(obj, expr, datum)); + OZ(write_lob_to_file(obj, expr, datum, data_writer)); if (has_enclose_) { - OZ(write_single_char_to_file(&char_enclose_)); + OZ(write_single_char_to_file(&char_enclose_, data_writer)); } return ret; } -int ObSelectIntoOp::print_field(const ObObj &obj) +int ObSelectIntoOp::print_field(const ObObj &obj, ObIOBufferWriter &data_writer) { int ret = OB_SUCCESS; char char_n = 'N'; const bool need_enclose = has_enclose_ && !obj.is_null() && (!MY_SPEC.is_optional_ || obj.is_string_type()); if (need_enclose) { - OZ(write_single_char_to_file(&char_enclose_)); + OZ(write_single_char_to_file(&char_enclose_, data_writer)); } if (!has_escape_) { - OZ(write_obj_to_file(obj, false)); + OZ(write_obj_to_file(obj, data_writer, false)); } else if (obj.is_null()) { - OZ(write_single_char_to_file(&char_escape_)); - OZ(write_single_char_to_file(&char_n)); + OZ(write_single_char_to_file(&char_escape_, data_writer)); + OZ(write_single_char_to_file(&char_n, data_writer)); } else { - OZ(write_obj_to_file(obj, true)); + OZ(write_obj_to_file(obj, data_writer, true)); } if (need_enclose) { - OZ(write_single_char_to_file(&char_enclose_)); + OZ(write_single_char_to_file(&char_enclose_, data_writer)); } return ret; } -int ObSelectIntoOp::into_outfile() +int ObSelectIntoOp::into_outfile(ObIOBufferWriter *data_writer) { int ret = OB_SUCCESS; const ObIArray &select_exprs = MY_SPEC.select_exprs_; ObDatum *datum = NULL; ObObj obj; + ObDatum *partition_datum = NULL; + if (do_partition_) { + if (OB_FAIL(MY_SPEC.file_partition_expr_->eval(eval_ctx_, partition_datum))) { + LOG_WARN("eval expr failed", K(ret)); + } else if (OB_FAIL(get_data_writer_for_partition(partition_datum, data_writer))) { + LOG_WARN("failed to set data writer for partition", K(ret)); + } + } + if (OB_SUCC(ret) && OB_ISNULL(data_writer)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null data writer", K(ret)); + } for (int64_t i = 0; OB_SUCC(ret) && i < select_exprs.count(); ++i) { if (OB_ISNULL(select_exprs.at(i))) { ret = OB_ERR_UNEXPECTED; @@ -885,29 +1256,728 @@ int ObSelectIntoOp::into_outfile() select_exprs.at(i)->obj_datum_map_))) { LOG_WARN("failed to get obj from datum", K(ret)); } else if (!ob_is_text_tc(select_exprs.at(i)->obj_meta_.get_type())) { - OZ(print_field(obj)); + OZ(print_field(obj, *data_writer)); } else { // text tc - OZ(print_lob_field(obj, *select_exprs.at(i), *datum)); + OZ(print_lob_field(obj, *select_exprs.at(i), *datum, *data_writer)); } // print field terminator if (OB_SUCC(ret) && i != select_exprs.count() - 1) { - OZ(write_obj_to_file(MY_SPEC.field_str_)); + OZ(write_obj_to_file(MY_SPEC.field_str_, *data_writer)); } } // print line terminator - OZ(write_obj_to_file(MY_SPEC.line_str_)); + OZ(write_obj_to_file(MY_SPEC.line_str_, *data_writer)); // check if need split file - OZ(try_split_file()); + OZ(try_split_file(*data_writer)); + // clear shared buffer + OZ(flush_shared_buf(*data_writer, get_flush_function())); return ret; } -int ObSelectIntoOp::into_outfile_batch(const ObBatchRows &brs) +#ifdef OB_BUILD_CPP_ODPS +int ObSelectIntoOp::into_odps() +{ + int ret = OB_SUCCESS; + const ObIArray &select_exprs = MY_SPEC.select_exprs_; + apsara::odps::sdk::ODPSTableRecordPtr table_record; + ObDatum *datum = NULL; + try { + if (OB_UNLIKELY(!upload_ || !record_writer_ || !(table_record = upload_->CreateBufferRecord()) + || !(table_record->GetSchema()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (table_record->GetSchema()->GetColumnCount() != select_exprs.count()) { + ret = OB_NOT_SUPPORTED; + LOG_USER_WARN(OB_NOT_SUPPORTED, "insert into partial column in external table"); + LOG_WARN("column count of odps record is not equal to count of select exprs", + K(table_record->GetSchema()->GetColumnCount()), K(select_exprs.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < select_exprs.count(); ++i) { + if (OB_ISNULL(select_exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("select expr is unexpected null", K(ret)); + } else if (OB_FAIL(select_exprs.at(i)->eval(eval_ctx_, datum))) { + LOG_WARN("eval expr failed", K(ret)); + } else if (OB_ISNULL(datum)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("datum is unexpected null", K(ret)); + } else if (lib::is_mysql_mode() + && OB_FAIL(set_odps_column_value_mysql(*table_record, *datum, + select_exprs.at(i)->datum_meta_, + select_exprs.at(i)->obj_meta_, + i))) { + LOG_WARN("failed to set odps column value", K(ret)); + } else if (lib::is_oracle_mode() + && OB_FAIL(set_odps_column_value_oracle(*table_record, *datum, + select_exprs.at(i)->datum_meta_, + select_exprs.at(i)->obj_meta_, + i))) { + LOG_WARN("failed to set odps column value", K(ret)); + } + } + record_writer_->Write(*table_record); + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when write one row to odps", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when write one row to odps", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when write one row to odps", K(ret)); + } + } + return ret; +} + +int ObSelectIntoOp::into_odps_batch(const ObBatchRows &brs) +{ + int ret = OB_SUCCESS; + const ObIArray &select_exprs = MY_SPEC.select_exprs_; + ObArray datum_vectors; + ObDatum *datum = NULL; + apsara::odps::sdk::ODPSTableRecordPtr table_record; + for (int64_t i = 0; OB_SUCC(ret) && i < select_exprs.count(); ++i) { + if (OB_ISNULL(select_exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("select expr is unexpected null", K(ret)); + } else if (OB_FAIL(select_exprs.at(i)->eval_batch(eval_ctx_, *brs.skip_, brs.size_))) { + LOG_WARN("failed to eval batch", K(ret), KPC(select_exprs.at(i))); + } else if (OB_FAIL(datum_vectors.push_back(select_exprs.at(i)->locate_expr_datumvector(eval_ctx_)))) { + LOG_WARN("failed to push back datum vector", K(ret)); + } + } + try { + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(!upload_ || !record_writer_ + || !(table_record = upload_->CreateBufferRecord()) + || !(table_record->GetSchema()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (table_record->GetSchema()->GetColumnCount() != select_exprs.count()) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "insert into partial column in external table"); + LOG_WARN("column count of odps record is not equal to count of select exprs", + K(table_record->GetSchema()->GetColumnCount()), K(select_exprs.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < brs.size_; ++i) { + if (brs.skip_->contain(i)) { + // do nothing + } else { + for (int64_t j = 0; OB_SUCC(ret) && j < select_exprs.count(); ++j) { + if (OB_ISNULL(datum = datum_vectors.at(j).at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("datum is unexpected null", K(ret)); + } else if (lib::is_mysql_mode() + && OB_FAIL(set_odps_column_value_mysql(*table_record, *datum, + select_exprs.at(j)->datum_meta_, + select_exprs.at(j)->obj_meta_, + j))) { + LOG_WARN("failed to set odps column value", K(ret)); + } else if (lib::is_oracle_mode() + && OB_FAIL(set_odps_column_value_oracle(*table_record, *datum, + select_exprs.at(j)->datum_meta_, + select_exprs.at(j)->obj_meta_, + j))) { + LOG_WARN("failed to set odps column value", K(ret)); + } + } + record_writer_->Write(*table_record); + } + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when write one batch to odps", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when write one batch to odps", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when write one batch to odps", K(ret)); + } + } + return ret; +} + +int ObSelectIntoOp::set_odps_column_value_mysql(apsara::odps::sdk::ODPSTableRecord &table_record, + const ObDatum &datum, + const ObDatumMeta &datum_meta, + const ObObjMeta &obj_meta, + uint32_t col_idx) +{ + int ret = OB_SUCCESS; + ObObjType ob_type = datum_meta.get_type(); + apsara::odps::sdk::ODPSColumnType odps_type; + uint32_t res_len = 0; + char *buf = NULL; + int64_t buf_size = 0; + ObArenaAllocator allocator("IntoOdps", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObMallocHookAttrGuard guard(ObMemAttr(MTL_ID(), "IntoOdps")); + try { + if (OB_UNLIKELY(!(table_record.GetSchema()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (datum.is_null()) { + table_record.SetNullValue(col_idx); + } else { + odps_type = table_record.GetSchema()->GetTableColumn(col_idx).GetType(); + switch (odps_type) + { + case apsara::odps::sdk::ODPS_BOOLEAN: + { + if (ObTinyIntType == ob_type) { + table_record.SetBoolValue(col_idx, datum.get_tinyint() != 0); + } else if (ObSmallIntType == ob_type) { + table_record.SetBoolValue(col_idx, datum.get_smallint() != 0); + } else if (ObMediumIntType == ob_type || ObInt32Type == ob_type) { + table_record.SetBoolValue(col_idx, datum.get_int32() != 0); + } else if (ObIntType == ob_type) { + table_record.SetBoolValue(col_idx, datum.get_int() != 0); + } + break; + } + case apsara::odps::sdk::ODPS_TINYINT: + { + table_record.SetTinyIntValue(col_idx, datum.get_tinyint()); + break; + } + case apsara::odps::sdk::ODPS_SMALLINT: + { + table_record.SetSmallIntValue(col_idx, datum.get_smallint()); + break; + } + case apsara::odps::sdk::ODPS_INTEGER: + { + table_record.SetIntegerValue(col_idx, datum.get_int32()); + break; + } + case apsara::odps::sdk::ODPS_BIGINT: + { + table_record.SetBigIntValue(col_idx, datum.get_int()); + break; + } + case apsara::odps::sdk::ODPS_FLOAT: + { + table_record.SetFloatValue(col_idx, datum.get_float()); + break; + } + case apsara::odps::sdk::ODPS_DOUBLE: + { + table_record.SetDoubleValue(col_idx, datum.get_double()); + break; + } + case apsara::odps::sdk::ODPS_DECIMAL: + { + std::string dec; + if (OB_FAIL(decimal_to_string(datum, datum_meta, dec, allocator))) { + LOG_WARN("failed to get string", K(ret)); + } else { + table_record.SetDecimalValue(col_idx, dec); + } + break; + } + case apsara::odps::sdk::ODPS_CHAR: + case apsara::odps::sdk::ODPS_VARCHAR: + { + buf_size = datum.get_string().length() * ObCharset::MAX_MB_LEN; + if (CHARSET_UTF8MB4 == ObCharset::charset_type_by_coll(datum_meta.cs_type_)) { + res_len = static_cast(datum.get_string().length()); + buf = const_cast(datum.get_string().ptr()); + } else if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (OB_FAIL(ObCharset::charset_convert(datum_meta.cs_type_, + datum.get_string().ptr(), + datum.get_string().length(), + CS_TYPE_UTF8MB4_BIN, + buf, + buf_size, + res_len, + false, + false))) { + LOG_WARN("failed to convert charset", K(ret)); + } + if (OB_FAIL(ret)) { + } else if ((apsara::odps::sdk::ODPS_CHAR == odps_type && res_len > 255) + || (apsara::odps::sdk::ODPS_VARCHAR == odps_type && res_len > 65535)) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("string length out of range", K(res_len)); + } else if (buf == NULL && res_len == 0) { + table_record.SetStringValue(col_idx, "", res_len, odps_type); + } else { + table_record.SetStringValue(col_idx, buf, res_len, odps_type); + } + break; + } + case apsara::odps::sdk::ODPS_STRING: + case apsara::odps::sdk::ODPS_BINARY: + { + ObString lob_str; + if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, + datum, + datum_meta, + obj_meta.has_lob_header(), + lob_str, + &ctx_))) { + LOG_WARN("failed to read string", K(ret)); + } else if (apsara::odps::sdk::ODPS_BINARY == odps_type + || CHARSET_UTF8MB4 == ObCharset::charset_type_by_coll(datum_meta.cs_type_) + || CS_TYPE_BINARY == datum_meta.cs_type_) { + res_len = static_cast(lob_str.length()); + buf = const_cast(lob_str.ptr()); + } else if (OB_FALSE_IT(buf_size = lob_str.length() * ObCharset::MAX_MB_LEN)) { + } else if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (OB_FAIL(ObCharset::charset_convert(datum_meta.cs_type_, + lob_str.ptr(), + lob_str.length(), + CS_TYPE_UTF8MB4_BIN, + buf, + buf_size, + res_len, + false, + false))) { + LOG_WARN("failed to convert charset", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (res_len > 8 * 1024 * 1024) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("string length out of range", K(res_len)); + } else if (buf == NULL && res_len == 0) { + table_record.SetStringValue(col_idx, "", res_len, odps_type); + } else { + LOG_DEBUG("debug select into lob", K(datum_meta.cs_type_), K(ObString(res_len, buf))); + table_record.SetStringValue(col_idx, buf, res_len, odps_type); + } + break; + } + case apsara::odps::sdk::ODPS_JSON: + { + ObString json_str; + ObIJsonBase *j_base = NULL; + ObJsonBuffer jbuf(&allocator); + ObJsonInType in_type = ObJsonInType::JSON_BIN; + uint32_t parse_flag = lib::is_mysql_mode() ? 0 : ObJsonParser::JSN_RELAXED_FLAG; + if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, + datum, + datum_meta, + obj_meta.has_lob_header(), + json_str, + &ctx_))) { + LOG_WARN("failed to read string", K(ret)); + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&allocator, json_str, in_type, + in_type, j_base, parse_flag))) { + COMMON_LOG(WARN, "fail to get json base", K(ret), K(in_type)); + } else if (OB_FAIL(j_base->print(jbuf, false))) { // json binary to string + COMMON_LOG(WARN, "fail to convert json to string", K(ret)); + } else if (jbuf.length() > UINT32_MAX) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("data out of range", K(odps_type), K(jbuf.length()), K(ret)); + } else { + LOG_DEBUG("debug select into json", K(datum_meta.cs_type_), K(ObString(jbuf.length(), jbuf.ptr()))); + table_record.SetJsonValue(col_idx, jbuf.ptr(), static_cast(jbuf.length())); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP: + case apsara::odps::sdk::ODPS_TIMESTAMP_NTZ: + { + int64_t us = apsara::odps::sdk::ODPS_TIMESTAMP == odps_type + ? datum.get_timestamp() + : datum.get_datetime(); + int64_t sec = us / 1000000; + int32_t ns = (us % 1000000) * 1000; + if (us < ORACLE_DATETIME_MIN_VAL) { + ret = OB_DATETIME_FUNCTION_OVERFLOW; + LOG_WARN("odps timestamp min value is 0001-01-01 00:00:00", K(ret), K(us)); + } else { + table_record.SetTimeValue(col_idx, sec, ns, odps_type); + } + break; + } + case apsara::odps::sdk::ODPS_DATE: + { + if (datum.get_date() < ODPS_DATE_MIN_VAL) { + ret = OB_DATETIME_FUNCTION_OVERFLOW; + LOG_WARN("odps date min value is 0001-01-01", K(ret)); + } else { + table_record.SetDateValue(col_idx, datum.get_date()); + } + break; + } + case apsara::odps::sdk::ODPS_DATETIME: + { + int32_t tmp_offset = 0; + if (OB_ISNULL(ctx_.get_my_session()) || OB_ISNULL(ctx_.get_my_session()->get_timezone_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ctx_.get_my_session()->get_timezone_info()->get_timezone_offset(0, tmp_offset))) { + LOG_WARN("failed to get timezone offset", K(ret)); + } else if (datum.get_datetime() < ORACLE_DATETIME_MIN_VAL + SEC_TO_USEC(tmp_offset)) { + ret = OB_DATETIME_FUNCTION_OVERFLOW; + LOG_WARN("odps datetime min value is 0001-01-01 00:00:00", K(ret)); + } else { + table_record.SetDatetimeValue(col_idx, (datum.get_datetime() - SEC_TO_USEC(tmp_offset)) / 1000); + } + break; + } + default: + { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected type", K(ob_type), K(odps_type), K(ret)); + } + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when set odps column value mysql", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when set odps column value mysql", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when set odps column value mysql", K(ret)); + } + } + return ret; +} + +int ObSelectIntoOp::set_odps_column_value_oracle(apsara::odps::sdk::ODPSTableRecord &table_record, + const ObDatum &datum, + const ObDatumMeta &datum_meta, + const ObObjMeta &obj_meta, + uint32_t col_idx) +{ + int ret = OB_SUCCESS; + ObObjType ob_type = datum_meta.get_type(); + apsara::odps::sdk::ODPSColumnType odps_type; + int64_t int_value = 0; + uint32_t res_len = 0; + char *buf = NULL; + int64_t buf_size = 0; + ObArenaAllocator allocator("IntoOdps", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObMallocHookAttrGuard guard(ObMemAttr(MTL_ID(), "IntoOdps")); + try { + if (OB_UNLIKELY(!(table_record.GetSchema()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (datum.is_null()) { + table_record.SetNullValue(col_idx); + } else { + odps_type = table_record.GetSchema()->GetTableColumn(col_idx).GetType(); + switch (odps_type) + { + case apsara::odps::sdk::ODPS_BOOLEAN: + { + if (OB_FAIL(decimal_or_number_to_int64(datum, datum_meta, int_value))) { + LOG_WARN("failed to get int64", K(ret)); + } else { + table_record.SetBoolValue(col_idx, int_value != 0); + } + break; + } + case apsara::odps::sdk::ODPS_TINYINT: + { + if (OB_FAIL(decimal_or_number_to_int64(datum, datum_meta, int_value))) { + LOG_WARN("failed to get int64", K(ret)); + } else if (int_value < INT8_MIN || int_value > INT8_MAX) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("data out of range", K(odps_type), K(ret)); + } else { + table_record.SetTinyIntValue(col_idx, static_cast(int_value)); + } + break; + } + case apsara::odps::sdk::ODPS_SMALLINT: + { + if (OB_FAIL(decimal_or_number_to_int64(datum, datum_meta, int_value))) { + LOG_WARN("failed to get int64", K(ret)); + } else if (int_value < INT16_MIN || int_value > INT16_MAX) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("data out of range", K(odps_type), K(ret)); + } else { + table_record.SetSmallIntValue(col_idx, static_cast(int_value)); + } + break; + } + case apsara::odps::sdk::ODPS_INTEGER: + { + if (OB_FAIL(decimal_or_number_to_int64(datum, datum_meta, int_value))) { + LOG_WARN("failed to get int64", K(ret)); + } else if (int_value < INT32_MIN || int_value > INT32_MAX) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("data out of range", K(odps_type), K(ret)); + } else { + table_record.SetIntegerValue(col_idx, static_cast(int_value)); + } + break; + } + case apsara::odps::sdk::ODPS_BIGINT: + { + if (OB_FAIL(decimal_or_number_to_int64(datum, datum_meta, int_value))) { + LOG_WARN("failed to get int64", K(ret)); + } else { + table_record.SetBigIntValue(col_idx, int_value); + } + break; + } + case apsara::odps::sdk::ODPS_FLOAT: + { + table_record.SetFloatValue(col_idx, datum.get_float()); + break; + } + case apsara::odps::sdk::ODPS_DOUBLE: + { + table_record.SetDoubleValue(col_idx, datum.get_double()); + break; + } + case apsara::odps::sdk::ODPS_DECIMAL: + { + std::string dec; + if (OB_FAIL(decimal_to_string(datum, datum_meta, dec, allocator))) { + LOG_WARN("failed to get string", K(ret)); + } else { + table_record.SetDecimalValue(col_idx, dec); + } + break; + } + case apsara::odps::sdk::ODPS_CHAR: + case apsara::odps::sdk::ODPS_VARCHAR: + { + buf_size = datum.get_string().length() * ObCharset::MAX_MB_LEN; + if (CHARSET_UTF8MB4 == ObCharset::charset_type_by_coll(datum_meta.cs_type_)) { + res_len = static_cast(datum.get_string().length()); + buf = const_cast(datum.get_string().ptr()); + } else if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (OB_FAIL(ObCharset::charset_convert(datum_meta.cs_type_, + datum.get_string().ptr(), + datum.get_string().length(), + CS_TYPE_UTF8MB4_BIN, + buf, + buf_size, + res_len, + false, + false))) { + LOG_WARN("failed to convert charset", K(ret)); + } + if (OB_FAIL(ret)) { + } else if ((apsara::odps::sdk::ODPS_CHAR == odps_type && res_len > 255) + || (apsara::odps::sdk::ODPS_VARCHAR == odps_type && res_len > 65535)) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("string length out of range", K(res_len)); + } else if (buf == NULL && res_len == 0) { + table_record.SetStringValue(col_idx, "", res_len, odps_type); + } else { + table_record.SetStringValue(col_idx, buf, res_len, odps_type); + } + break; + } + case apsara::odps::sdk::ODPS_STRING: + case apsara::odps::sdk::ODPS_BINARY: + { + ObString lob_str; + if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, + datum, + datum_meta, + obj_meta.has_lob_header(), + lob_str, + &ctx_))) { + LOG_WARN("failed to read string", K(ret)); + } else if (apsara::odps::sdk::ODPS_BINARY == odps_type + || CHARSET_UTF8MB4 == ObCharset::charset_type_by_coll(datum_meta.cs_type_) + || CS_TYPE_BINARY == datum_meta.cs_type_) { + res_len = static_cast(lob_str.length()); + buf = const_cast(lob_str.ptr()); + } else if (OB_FALSE_IT(buf_size = lob_str.length() * ObCharset::MAX_MB_LEN)) { + } else if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (OB_FAIL(ObCharset::charset_convert(datum_meta.cs_type_, + lob_str.ptr(), + lob_str.length(), + CS_TYPE_UTF8MB4_BIN, + buf, + buf_size, + res_len, + false, + false))) { + LOG_WARN("failed to convert charset", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (res_len > 8 * 1024 * 1024) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("string length out of range", K(res_len)); + } else if (buf == NULL && res_len == 0) { + table_record.SetStringValue(col_idx, "", res_len, odps_type); + } else { + table_record.SetStringValue(col_idx, buf, res_len, odps_type); + } + break; + } + case apsara::odps::sdk::ODPS_JSON: + { + ObString json_str; + ObIJsonBase *j_base = NULL; + ObJsonBuffer jbuf(&allocator); + ObJsonInType in_type = ObJsonInType::JSON_BIN; + uint32_t parse_flag = lib::is_mysql_mode() ? 0 : ObJsonParser::JSN_RELAXED_FLAG; + if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, + datum, + datum_meta, + obj_meta.has_lob_header(), + json_str, + &ctx_))) { + LOG_WARN("failed to read string", K(ret)); + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&allocator, json_str, in_type, + in_type, j_base, parse_flag))) { + COMMON_LOG(WARN, "fail to get json base", K(ret), K(in_type)); + } else if (OB_FAIL(j_base->print(jbuf, false))) { // json binary to string + COMMON_LOG(WARN, "fail to convert json to string", K(ret)); + } else if (jbuf.length() > UINT32_MAX) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("data out of range", K(odps_type), K(jbuf.length()), K(ret)); + } else { + table_record.SetJsonValue(col_idx, jbuf.ptr(), static_cast(jbuf.length())); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP: + case apsara::odps::sdk::ODPS_TIMESTAMP_NTZ: + { + ObOTimestampData timestamp = datum.get_otimestamp_tiny(); + table_record.SetTimeValue(col_idx, timestamp.time_us_ / 1000000, timestamp.time_ctx_.tail_nsec_, odps_type); + break; + } + case apsara::odps::sdk::ODPS_DATE: + { + table_record.SetDateValue(col_idx, datum.get_datetime() / 1000000 / 3600 / 24); + break; + } + case apsara::odps::sdk::ODPS_DATETIME: + { + ObOTimestampData timestamp = datum.get_otimestamp_tiny(); + int32_t tmp_offset = 0; + if (OB_ISNULL(ctx_.get_my_session()) || OB_ISNULL(ctx_.get_my_session()->get_timezone_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(ctx_.get_my_session()->get_timezone_info()->get_timezone_offset(0, tmp_offset))) { + LOG_WARN("failed to get timezone offset", K(ret)); + } else { + table_record.SetDatetimeValue(col_idx, (timestamp.time_us_ - SEC_TO_USEC(tmp_offset)) / 1000); + } + break; + } + default: + { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected type", K(ob_type), K(odps_type), K(ret)); + } + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when set odps column value oracle", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when set odps column value oracle", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when set odps column value oracle", K(ret)); + } + } + return ret; +} +#endif + +int ObSelectIntoOp::decimal_to_string(const ObDatum &datum, + const ObDatumMeta &datum_meta, + std::string &res, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + char *buf = NULL; + int64_t pos = 0; + int32_t int_bytes = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(datum_meta.precision_); + if (OB_ISNULL(buf = static_cast(allocator.alloc(OB_CAST_TO_VARCHAR_MAX_LENGTH)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (OB_FAIL(wide::to_string(datum.get_decimal_int(), datum.get_int_bytes(), datum_meta.scale_, + buf, OB_CAST_TO_VARCHAR_MAX_LENGTH, pos))) { + LOG_WARN("failed to get string", K(ret)); + } else { + res.assign(buf, pos); + } + return ret; +} + +int ObSelectIntoOp::decimal_or_number_to_int64(const ObDatum &datum, + const ObDatumMeta &datum_meta, + int64_t &res) +{ + int ret = OB_SUCCESS; + ObObjType ob_type = datum_meta.get_type(); + if (ObNumberType == ob_type) { + const number::ObNumber nmb(datum.get_number()); + if (OB_FAIL(nmb.extract_valid_int64_with_trunc(res))) { + LOG_WARN("failed to cast number to int64", K(ret)); + } + } else if (ObDecimalIntType == ob_type) { + int32_t int_bytes = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(datum_meta.precision_); + bool is_valid; + if (OB_FAIL(wide::check_range_valid_int64(datum.get_decimal_int(), int_bytes, is_valid, res))) { + LOG_WARN("failed to check decimal int", K(int_bytes), K(ret)); + } else if (!is_valid) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("decimal int is not valid int64", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected type", K(ob_type), K(ret)); + } + return ret; +} + +int ObSelectIntoOp::into_outfile_batch(const ObBatchRows &brs, ObIOBufferWriter *data_writer) { int ret = OB_SUCCESS; const ObIArray &select_exprs = MY_SPEC.select_exprs_; ObArray datum_vectors; ObDatum *datum = NULL; ObObj obj; + ObDatumVector partition_datum_vector; for (int64_t i = 0; OB_SUCC(ret) && i < select_exprs.count(); ++i) { if (OB_FAIL(select_exprs.at(i)->eval_batch(eval_ctx_, *brs.skip_, brs.size_))) { LOG_WARN("failed to eval batch", K(ret)); @@ -915,9 +1985,22 @@ int ObSelectIntoOp::into_outfile_batch(const ObBatchRows &brs) LOG_WARN("failed to push back datum vector", K(ret)); } } + if (OB_SUCC(ret) && do_partition_) { + if (OB_FAIL(MY_SPEC.file_partition_expr_->eval_batch(eval_ctx_, *brs.skip_, brs.size_))) { + LOG_WARN("failed to eval batch", K(ret)); + } else { + partition_datum_vector = MY_SPEC.file_partition_expr_->locate_expr_datumvector(eval_ctx_); + } + } for (int64_t i = 0; OB_SUCC(ret) && i < brs.size_; ++i) { if (brs.skip_->contain(i)) { // do nothing + } else if (do_partition_ && OB_FAIL(get_data_writer_for_partition(partition_datum_vector.at(i), + data_writer))) { + LOG_WARN("failed to set data writer for partition", K(ret)); + } else if (OB_ISNULL(data_writer)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null data writer", K(ret)); } else { for (int64_t j = 0; OB_SUCC(ret) && j < select_exprs.count(); ++j) { if (OB_ISNULL(datum = datum_vectors.at(j).at(i))) { @@ -928,41 +2011,46 @@ int ObSelectIntoOp::into_outfile_batch(const ObBatchRows &brs) select_exprs.at(j)->obj_datum_map_))) { LOG_WARN("failed to get obj from datum", K(ret)); } else if (!ob_is_text_tc(select_exprs.at(j)->obj_meta_.get_type())) { - OZ(print_field(obj)); + OZ(print_field(obj, *data_writer)); } else { // text tc - OZ(print_lob_field(obj, *select_exprs.at(j), *datum)); + OZ(print_lob_field(obj, *select_exprs.at(j), *datum, *data_writer)); } // print field terminator if (OB_SUCC(ret) && j != select_exprs.count() - 1) { - OZ(write_obj_to_file(MY_SPEC.field_str_)); + OZ(write_obj_to_file(MY_SPEC.field_str_, *data_writer)); } } // print line terminator - OZ(write_obj_to_file(MY_SPEC.line_str_)); + OZ(write_obj_to_file(MY_SPEC.line_str_, *data_writer)); // check if need split file - OZ(try_split_file()); + OZ(try_split_file(*data_writer)); + // clear shared buffer + OZ(flush_shared_buf(*data_writer, get_flush_function())); } } return ret; } -int ObSelectIntoOp::into_dumpfile() +int ObSelectIntoOp::into_dumpfile(ObIOBufferWriter *data_writer) { int ret = OB_SUCCESS; char buf[MAX_VALUE_LENGTH]; int64_t buf_len = MAX_VALUE_LENGTH; int64_t pos = 0; - if (OB_FAIL(get_row_str(buf_len, is_first_, buf, pos))) { + if (OB_ISNULL(data_writer)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(get_row_str(buf_len, is_first_, buf, pos))) { LOG_WARN("get str failed", K(ret)); } else if (is_first_) { // create file - if (OB_FAIL(file_appender_.create(file_name_.get_varchar(), true))) { + if (OB_FAIL(data_writer->file_appender_.create(file_name_.get_varchar(), true))) { LOG_WARN("create dumpfile failed", K(ret), K(file_name_)); } else { is_first_ = false; } } if (OB_SUCC(ret)) { - if (OB_FAIL(file_appender_.append(buf, pos, false))) { + if (OB_FAIL(data_writer->file_appender_.append(buf, pos, false))) { LOG_WARN("failed to append file"); } else { //do nothing @@ -1057,10 +2145,10 @@ int ObSelectIntoOp::prepare_escape_printer() // wc->mb if (OB_ISNULL(buf = static_cast(ctx_.get_allocator().alloc(buf_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate buffer", K(ret), K(buf_len)); + LOG_WARN("failed to allocate buffer", K(ret), K(buf_len)); } - OZ(print_wchar_to_buf(buf, buf_len, pos, wchar_enclose, escape_printer_.enclose_, MY_SPEC.cs_type_)); - OZ(print_wchar_to_buf(buf, buf_len, pos, wchar_escape, escape_printer_.escape_, MY_SPEC.cs_type_)); + OZ(print_wchar_to_buf(buf, buf_len, pos, wchar_enclose, escape_printer_.enclose_, MY_SPEC.cs_type_)); //todo@linyi if has_enclose_ + OZ(print_wchar_to_buf(buf, buf_len, pos, wchar_escape, escape_printer_.escape_, MY_SPEC.cs_type_)); //todo@linyi OZ(print_wchar_to_buf(buf, buf_len, pos, wchar_zero, escape_printer_.zero_, MY_SPEC.cs_type_)); OZ(print_wchar_to_buf(buf, buf_len, pos, wchar_field, escape_printer_.field_terminator_, MY_SPEC.cs_type_)); OZ(print_wchar_to_buf(buf, buf_len, pos, wchar_line, escape_printer_.line_terminator_, MY_SPEC.cs_type_)); @@ -1087,14 +2175,228 @@ int ObSelectIntoOp::check_has_lob_or_json() return ret; } +int ObSelectIntoOp::create_shared_buffer_for_data_writer() +{ + int ret = OB_SUCCESS; + shared_buf_len_ = has_lob_ ? (5 * SHARED_BUFFER_SIZE) : SHARED_BUFFER_SIZE; + if (OB_ISNULL(shared_buf_ = static_cast(ctx_.get_allocator().alloc(shared_buf_len_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate buffer", K(ret), K(shared_buf_len_)); + } + if (OB_SUCC(ret) && has_json_ && has_escape_) { + json_buf_len_ = OB_MALLOC_MIDDLE_BLOCK_SIZE; + if (OB_ISNULL(json_buf_ = static_cast(ctx_.get_allocator().alloc(json_buf_len_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate buffer", K(ret), K(json_buf_len_)); + } + } + return ret; +} + +int ObSelectIntoOp::check_secure_file_path(ObString file_name) +{ + int ret = OB_SUCCESS; + ObString file_path = file_name.split_on(file_name.reverse_find('/')); + char full_path_buf[PATH_MAX+1]; + char *actual_path = nullptr; + ObSqlString sql_str; + ObString secure_file_priv; + int64_t tenant_id = MTL_ID(); + if (OB_FAIL(sql_str.append(file_path.empty() ? "." : file_path))) { + LOG_WARN("failed to append string", K(ret)); + } else if (OB_ISNULL(actual_path = realpath(sql_str.ptr(), full_path_buf))) { + ret = OB_FILE_NOT_EXIST; + LOG_WARN("file not exist", K(ret), K(sql_str)); + } else if (OB_FAIL(ObSchemaUtils::get_tenant_varchar_variable(tenant_id, + SYS_VAR_SECURE_FILE_PRIV, + ctx_.get_allocator(), + secure_file_priv))) { + LOG_WARN("fail get tenant variable", K(tenant_id), K(secure_file_priv), K(ret)); + } else if (OB_FAIL(ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) { + LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv)); + if (OB_ERR_NO_PRIVILEGE == ret) { + ret = OB_ERR_NO_PRIV_DIRECT_PATH_ACCESS; + LOG_ERROR("failed to check secure path", K(ret), K(secure_file_priv)); + } + } + return ret; +} + +int ObSelectIntoOp::get_data_writer_for_partition(ObDatum *partition_datum, + ObIOBufferWriter *&data_writer) +{ + int ret = OB_SUCCESS; + ObString partition; + void *ptr = NULL; + ObIOBufferWriter *value = NULL; + const int64_t buf_len = MY_SPEC.buffer_size_; + char *buf = NULL; + if (OB_ISNULL(partition_datum)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_SUCC(partition_map_.get_refactored(partition_datum->get_string(), value))) { + if (OB_ISNULL(value)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else { + data_writer = value; + } + } else if (OB_UNLIKELY(OB_HASH_NOT_EXIST != ret)){ + LOG_WARN("get unexpected error", K(ret)); + } else if (curr_partition_num_ >= OB_MAX_PARTITION_NUM_ORACLE) { + ret = OB_TOO_MANY_PARTITIONS_ERROR; + LOG_WARN("too many partitions", K(ret)); + } else { + ret = OB_SUCCESS; + //new data_writer + if (OB_ISNULL(ptr = ctx_.get_allocator().alloc(sizeof(ObIOBufferWriter)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate data writer", K(ret), K(sizeof(ObIOBufferWriter))); + } else { + data_writer = new(ptr) ObIOBufferWriter(); + } + //init buffer + if (OB_FAIL(ret) || buf_len <= 0) { + } else if (OB_ISNULL(buf = static_cast(ctx_.get_allocator().alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate buffer", K(ret), K(buf_len)); + } else { + data_writer->init(buf, buf_len); + } + //add to hashmap + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ob_write_string(ctx_.get_allocator(), + partition_datum->get_string(), + partition))) { + LOG_WARN("failed to write string", K(ret)); + } else if (OB_FAIL(partition_map_.set_refactored(partition, data_writer))) { + LOG_WARN("failed to add data writer to map", K(ret)); + } else { + curr_partition_num_++; + } + if (OB_FAIL(ret) && NULL != data_writer) { + data_writer->~ObIOBufferWriter(); + } + //calc file path + if (OB_SUCC(ret) && OB_FAIL(calc_file_path_with_partition(partition, *data_writer))) { + LOG_WARN("failed to calc file path with partition", K(ret)); + } + } + return ret; +} + +int ObSelectIntoOp::create_the_only_data_writer(ObIOBufferWriter *&data_writer) +{ + int ret = OB_SUCCESS; + void *ptr = NULL; + const int64_t buf_len = MY_SPEC.buffer_size_; + char *buf = NULL; + if (OB_ISNULL(ptr = ctx_.get_allocator().alloc(sizeof(ObIOBufferWriter)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate data writer", K(ret), K(sizeof(ObIOBufferWriter))); + } else { + data_writer = new(ptr) ObIOBufferWriter(); + data_writer->url_ = basic_url_; + data_writer_ = data_writer; + } + if (OB_FAIL(ret)) { + } else if (T_INTO_OUTFILE == MY_SPEC.into_type_ && MY_SPEC.is_single_ + && OB_FAIL(open_file(*data_writer))) { + LOG_WARN("failed to open file", K(ret)); + } else if (buf_len <= 0) { + } else if (OB_ISNULL(buf = static_cast(ctx_.get_allocator().alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate buffer", K(ret), K(buf_len)); + } else { + data_writer->init(buf, buf_len); + } + if (OB_FAIL(ret) && NULL != data_writer) { + data_writer->~ObIOBufferWriter(); + } + return ret; +} + +#ifdef OB_BUILD_CPP_ODPS +int ObSelectIntoOp::odps_commit_upload() +{ + int ret = OB_SUCCESS; + bool is_in_px = (NULL != ctx_.get_sqc_handler()); + if (is_in_px) { + ObOdpsPartitionDownloaderMgr &odps_mgr = ctx_.get_sqc_handler()->get_sqc_ctx().gi_pump_.get_odps_mgr(); + if (!need_commit_) { + odps_mgr.set_fail(); + } + __sync_synchronize(); + int64_t ref = odps_mgr.dec_ref(); + if (0 > ref) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected ref", K(ref), K(ret)); + } else if (0 == ref && OB_FAIL(odps_mgr.commit_upload())) { + LOG_WARN("failed to commit upload", K(ret)); + } + } else { + std::vector blocks; + try { + if (OB_UNLIKELY(!record_writer_ || !upload_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + record_writer_->Close(); + blocks.push_back(block_id_); + if (need_commit_) { + upload_->Commit(blocks); + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when commit", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when commit", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when commit", K(ret)); + } + } + } + return ret; +} +#endif + void ObSelectIntoOp::destroy() { - file_appender_.~ObFileAppender(); - close_file(); + ObIOBufferWriter *data_writer = NULL; + if (ObExternalFileFormat::FormatType::ODPS_FORMAT == format_type_) { +#ifdef OB_BUILD_CPP_ODPS + upload_.reset(); + record_writer_.reset(); +#endif + } else if (do_partition_) { + for (ObPartitionWriterMap::iterator iter = partition_map_.begin(); + iter != partition_map_.end(); iter++) { + if (OB_ISNULL(data_writer = iter->second)) { + } else { + close_file(*data_writer); + data_writer->~ObIOBufferWriter(); + } + } + } else if (OB_NOT_NULL(data_writer_)) { + close_file(*data_writer_); + data_writer_->~ObIOBufferWriter(); + } if (NULL != device_handle_) { common::ObDeviceManager::get_instance().release_device(device_handle_); device_handle_ = NULL; } + external_properties_.~ObExternalFileFormat(); + partition_map_.destroy(); ObOperator::destroy(); } diff --git a/src/sql/engine/basic/ob_select_into_op.h b/src/sql/engine/basic/ob_select_into_op.h index f6e9a4bff2..cd8961b3ba 100644 --- a/src/sql/engine/basic/ob_select_into_op.h +++ b/src/sql/engine/basic/ob_select_into_op.h @@ -17,6 +17,11 @@ #include "lib/file/ob_file.h" #include "common/storage/ob_io_device.h" #include "share/backup/ob_backup_struct.h" +#include "sql/engine/cmd/ob_load_data_parser.h" +#ifdef OB_BUILD_CPP_ODPS +#include +#include +#endif namespace oceanbase { @@ -109,22 +114,36 @@ public: ObSelectIntoOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input) : ObOperator(exec_ctx, spec, input), top_limit_cnt_(INT64_MAX), - file_appender_(), is_first_(true), + basic_url_(), device_handle_(NULL), file_location_(IntoFileLocation::SERVER_DISK), write_offset_(0), - write_bytes_(0), - split_file_id_(0), + data_writer_(NULL), char_enclose_(0), char_escape_(0), has_enclose_(false), has_escape_(false), has_lob_(false), has_json_(false), - is_file_opened_(false), print_params_(), - escape_printer_() + escape_printer_(), + do_partition_(false), + json_buf_(NULL), + json_buf_len_(0), + shared_buf_(NULL), + shared_buf_len_(0), + use_shared_buf_(false), + partition_map_(), + curr_partition_num_(0), + external_properties_(), + format_type_(ObExternalFileFormat::FormatType::CSV_FORMAT), +#ifdef OB_BUILD_CPP_ODPS + upload_(NULL), + record_writer_(NULL), +#endif + block_id_(0), + need_commit_(true) { } @@ -187,20 +206,30 @@ public: { public: ObIOBufferWriter(): - buf_(NULL), curr_pos_(0), last_line_pos_(0), buf_len_(0), curr_line_len_(0) {} + buf_(NULL), + buf_len_(0), + curr_pos_(0), + last_line_pos_(0), + curr_line_len_(0), + write_bytes_(0), + is_file_opened_(false), + file_appender_(), + fd_(), + split_file_id_(0), + url_() + {} + ~ObIOBufferWriter() { + file_appender_.~ObFileAppender(); + } void init(char *buf, int64_t buf_len) { buf_ = buf; buf_len_ = buf_len; } - void init_json_buf(char *buf, int64_t buf_len) { - json_buf_ = buf; - json_buf_len_ = buf_len; - } template int flush(flush_func flush_data) { int ret = common::OB_SUCCESS; - if (last_line_pos_ > 0) { - if (OB_FAIL(flush_data(buf_, last_line_pos_))) { + if (last_line_pos_ > 0 && OB_NOT_NULL(buf_)) { + if (OB_FAIL(flush_data(buf_, last_line_pos_, this))) { } else { MEMCPY(buf_, buf_ + last_line_pos_, curr_pos_ - last_line_pos_); curr_pos_ = curr_pos_ - last_line_pos_; @@ -209,37 +238,30 @@ public: } return ret; } - template - int flush_all_for_lob(flush_func flush_data) { - int ret = common::OB_SUCCESS; - if (curr_pos_ > 0) { - if (OB_FAIL(flush_data(buf_, curr_pos_))) { - } else { - curr_line_len_ += (curr_pos_ - last_line_pos_); - curr_pos_ = 0; - last_line_pos_ = 0; - } - } - return ret; - } char *get_buf() { return buf_; } int64_t get_buf_len() { return buf_len_; } - char *get_json_buf() { return json_buf_; } - int64_t get_json_buf_len() { return json_buf_len_; } int64_t get_curr_pos() { return curr_pos_; } int64_t get_last_line_pos() { return last_line_pos_; } int64_t get_curr_line_len() {return curr_line_len_; } + int64_t get_write_bytes() { return write_bytes_; } void set_curr_pos(int64_t curr_pos) { curr_pos_ = curr_pos; } void update_last_line_pos() { last_line_pos_ = curr_pos_; } - void reset_curr_line_len() {curr_line_len_ = 0; } + void reset_curr_line_len() { curr_line_len_ = 0; } + void increase_curr_line_len() { curr_line_len_ += (curr_pos_ - last_line_pos_); } + void set_write_bytes(int64_t write_bytes) { write_bytes_ = write_bytes; } private: char *buf_; + int64_t buf_len_; int64_t curr_pos_; int64_t last_line_pos_; - int64_t buf_len_; int64_t curr_line_len_; - char *json_buf_; //json需要多一个buffer用来放转义前的string - int64_t json_buf_len_; + int64_t write_bytes_; + public: + bool is_file_opened_; + ObFileAppender file_appender_; + ObIOFd fd_; + int64_t split_file_id_; + ObString url_; }; virtual int inner_open() override; @@ -248,24 +270,34 @@ public: virtual int inner_get_next_row() override; virtual int inner_get_next_batch(const int64_t max_row_cnt) override; virtual void destroy() override; - void reset() - { - is_first_ = true; - file_appender_.close(); - device_handle_ = NULL; - file_location_ = IntoFileLocation::SERVER_DISK; - write_offset_ = 0; - write_bytes_ = 0; - split_file_id_ = 0; - data_writer_.init(NULL, 0); - is_file_opened_ = false; - } private: + int init_csv_env(); +#ifdef OB_BUILD_CPP_ODPS + int init_odps_tunnel(); + int into_odps(); + int into_odps_batch(const ObBatchRows &brs); + int odps_commit_upload(); + int set_odps_column_value_mysql(apsara::odps::sdk::ODPSTableRecord &table_record, + const ObDatum &datum, + const ObDatumMeta &datum_meta, + const ObObjMeta &obj_meta, + uint32_t col_idx); + int set_odps_column_value_oracle(apsara::odps::sdk::ODPSTableRecord &table_record, + const ObDatum &datum, + const ObDatumMeta &datum_meta, + const ObObjMeta &obj_meta, + uint32_t col_idx); +#endif + int decimal_or_number_to_int64(const ObDatum &datum, const ObDatumMeta &datum_meta, int64_t &res); + int decimal_to_string(const ObDatum &datum, + const ObDatumMeta &datum_meta, + std::string &res, + ObIAllocator &allocator); int get_row_str(const int64_t buf_len, bool is_first_row, char *buf, int64_t &pos); - int into_dumpfile(); - int into_outfile(); - int into_outfile_batch(const ObBatchRows &brs); + int into_dumpfile(ObIOBufferWriter *data_writer); + int into_outfile(ObIOBufferWriter *data_writer); + int into_outfile_batch(const ObBatchRows &brs, ObIOBufferWriter *data_writer); int extract_fisrt_wchar_from_varhcar(const ObObj &obj, int32_t &wchar); int print_wchar_to_buf(char *buf, const int64_t buf_len, @@ -273,55 +305,117 @@ private: int32_t wchar, ObString &str, ObCollationType coll_type); - int print_field(const ObObj &obj); - int print_lob_field(const ObObj &obj, const ObExpr &expr, const ObDatum &datum); - void get_buf(char* &buf, int64_t &buf_len, int64_t &pos, bool is_json = false); - int flush_buf(int64_t &pos); - int resize_buf(char* &buf, int64_t &buf_len, int64_t &pos, bool is_json = false); - int write_obj_to_file(const ObObj &obj, bool need_escape = false); - int print_normal_obj_without_escape(const ObObj &obj, - char* &buf, - int64_t &buf_len, - int64_t &pos, - bool is_json = false); - int write_single_char_to_file(const char *wchar); - int write_lob_to_file(const ObObj &obj, const ObExpr &expr, const ObDatum &datum); - int try_split_file(); + int print_field(const ObObj &obj, ObIOBufferWriter &data_writer); + int print_lob_field(const ObObj &obj, + const ObExpr &expr, + const ObDatum &datum, + ObIOBufferWriter &data_writer); + int get_buf(char* &buf, int64_t &buf_len, int64_t &pos, ObIOBufferWriter &data_writer); + int flush_buf(ObIOBufferWriter &data_writer); + int use_shared_buf(ObIOBufferWriter &data_writer, char* &buf, int64_t &buf_len, int64_t &pos); + template + int flush_shared_buf(ObIOBufferWriter &data_writer, flush_func flush_data, bool continue_use_shared_buf = false) { + int ret = common::OB_SUCCESS; + if (data_writer.get_curr_pos() > 0 && use_shared_buf_) { + if (OB_FAIL(flush_data(shared_buf_, data_writer.get_curr_pos(), &data_writer))) { + } else { + if (has_lob_) { + data_writer.increase_curr_line_len(); + } + data_writer.set_curr_pos(0); + data_writer.update_last_line_pos(); + use_shared_buf_ = continue_use_shared_buf; + } + } + return ret; + } + int resize_buf(char* &buf, + int64_t &buf_len, + int64_t &pos, + int64_t curr_pos, + bool is_json = false); + int resize_or_flush_shared_buf(ObIOBufferWriter &data_writer, + char* &buf, + int64_t &buf_len, + int64_t &pos); + int check_buf_sufficient(ObIOBufferWriter &data_writer, + char* &buf, + int64_t &buf_len, + int64_t &pos, + int64_t str_len); + int write_obj_to_file(const ObObj &obj, ObIOBufferWriter &data_writer, bool need_escape = false); + int print_str_or_json_with_escape(const ObObj &obj, ObIOBufferWriter &data_writer); + int print_normal_obj_without_escape(const ObObj &obj, ObIOBufferWriter &data_writer); + int print_json_to_json_buf(const ObObj &obj, + char* &buf, + int64_t &buf_len, + int64_t &pos, + ObIOBufferWriter &data_writer); + int write_single_char_to_file(const char *wchar, ObIOBufferWriter &data_writer); + int write_lob_to_file(const ObObj &obj, + const ObExpr &expr, + const ObDatum &datum, + ObIOBufferWriter &data_writer); int into_varlist(); - int open_file(); - int calc_next_file_path(); + int open_file(ObIOBufferWriter &data_writer); + int calc_next_file_path(ObIOBufferWriter &data_writer); int calc_first_file_path(ObString &path); - int split_file(); - void close_file(); - std::function get_flush_function(); + int calc_file_path_with_partition(ObString partition, ObIOBufferWriter &data_writer); + int try_split_file(ObIOBufferWriter &data_writer); + int split_file(ObIOBufferWriter &data_writer); + void close_file(ObIOBufferWriter &data_writer); + std::function get_flush_function(); int prepare_escape_printer(); int check_has_lob_or_json(); + int create_shared_buffer_for_data_writer(); + int create_the_only_data_writer(ObIOBufferWriter *&data_writer); + int check_secure_file_path(ObString file_name); + int get_data_writer_for_partition(ObDatum *partition_datum, ObIOBufferWriter *&data_writer); + char *get_json_buf() { return json_buf_; } + int64_t get_json_buf_len() { return json_buf_len_; } + char *get_shared_buf() { return shared_buf_; } + int64_t get_shared_buf_len() { return shared_buf_len_; } private: int64_t top_limit_cnt_; - ObFileAppender file_appender_; bool is_first_; ObObj field_str_; ObObj line_str_; ObObj file_name_; - ObString url_; + ObString basic_url_; // url without partition expr share::ObBackupStorageInfo access_info_; ObIODevice* device_handle_; - ObIOFd fd_; IntoFileLocation file_location_; int64_t write_offset_; - int64_t write_bytes_; - int64_t split_file_id_; - ObIOBufferWriter data_writer_; + ObIOBufferWriter* data_writer_; char char_enclose_; char char_escape_; bool has_enclose_; bool has_escape_; bool has_lob_; bool has_json_; - bool is_file_opened_; common::ObObjPrintParams print_params_; ObEscapePrinter escape_printer_; + bool do_partition_; + char *json_buf_; //json需要多一个buffer用来放转义前的string + int64_t json_buf_len_; + char *shared_buf_; + int64_t shared_buf_len_; + bool use_shared_buf_; + typedef common::hash::ObHashMap ObPartitionWriterMap; + ObPartitionWriterMap partition_map_; + int curr_partition_num_; + ObExternalFileFormat external_properties_; + ObExternalFileFormat::FormatType format_type_; +#ifdef OB_BUILD_CPP_ODPS + apsara::odps::sdk::IUploadPtr upload_; + apsara::odps::sdk::IRecordWriterPtr record_writer_; +#endif + uint32_t block_id_; + bool need_commit_; + static const int64_t SHARED_BUFFER_SIZE = 2LL * 1024 * 1024; + static const int64_t MAX_OSS_FILE_SIZE = 5LL * 1024 * 1024 * 1024; + static const int32_t ODPS_DATE_MIN_VAL = -719162; // '0001-1-1' }; diff --git a/src/sql/engine/cmd/ob_load_data_parser.cpp b/src/sql/engine/cmd/ob_load_data_parser.cpp index 32038712a8..a9c642e8bc 100644 --- a/src/sql/engine/cmd/ob_load_data_parser.cpp +++ b/src/sql/engine/cmd/ob_load_data_parser.cpp @@ -19,6 +19,10 @@ #include "lib/string/ob_hex_utils_base.h" #include "deps/oblib/src/lib/list/ob_dlist.h" #include "share/schema/ob_column_schema.h" +#ifdef OB_BUILD_CPP_ODPS +#include "share/ob_encryption_util.h" +#endif +#include "lib/utility/ob_print_utils.h" using namespace oceanbase::sql; using namespace oceanbase::common; @@ -32,10 +36,302 @@ const char INVALID_TERM_CHAR = '\xff'; const char * ObExternalFileFormat::FORMAT_TYPE_STR[] = { "CSV", "PARQUET", + "ODPS", "ORC", }; static_assert(array_elements(ObExternalFileFormat::FORMAT_TYPE_STR) == ObExternalFileFormat::MAX_FORMAT, "Not enough initializer for ObExternalFileFormat"); +int64_t ObODPSGeneralFormat::to_json_kv_string(char *buf, const int64_t buf_len) const +{ + int64_t pos = 0; + int64_t idx = 0; + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(access_type_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(access_id_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(access_key_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(sts_token_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(endpoint_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(project_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(schema_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(table_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(quota_))); + J_COMMA(); + databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(compression_code_))); + return pos; +} + +int ObODPSGeneralFormat::encrypt_str(common::ObString &src, common::ObString &dst) +{ + int ret = OB_SUCCESS; +#ifdef OB_BUILD_CPP_ODPS + const uint64_t tenant_id = MTL_ID(); + if (src.empty()) { + //do nothing + dst = src; + } else { + char encrypted_string[common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH] = {0}; + + char hex_buff[common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH + 1] = {0}; // +1 to reserve space for \0 + int64_t encrypt_len = -1; + if (OB_FAIL(oceanbase::share::ObEncryptionUtil::encrypt_sys_data(tenant_id, + src.ptr(), + src.length(), + encrypted_string, + common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH, + encrypt_len))) { + + LOG_WARN("fail to encrypt_sys_data", KR(ret), K(src)); + } else if (0 >= encrypt_len || common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH < encrypt_len * 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("encrypt_len is invalid", K(ret), K(encrypt_len), K(common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH)); + } else if (OB_FAIL(to_hex_cstr(encrypted_string, encrypt_len, hex_buff, common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH + 1))) { + LOG_WARN("fail to print to hex str", K(ret)); + } else if (OB_FAIL(deep_copy_str(ObString(hex_buff), dst))) { + LOG_WARN("failed to deep copy encrypted_string", K(ret)); + } else { + LOG_TRACE("succ to encrypt src", K(ret)); + } + } +#endif + return ret; +} + +int ObODPSGeneralFormat::decrypt_str(common::ObString &src, common::ObString &dst) +{ + int ret = OB_SUCCESS; +#ifdef OB_BUILD_CPP_ODPS + const uint64_t tenant_id = MTL_ID(); + if (src.empty()) { + // do nothing + dst = src; + } else if (0 != src.length() % 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid src", K(src.length()), K(ret)); + } else { + char encrypted_password_not_hex[common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH] = {0}; + char plain_string[common::OB_MAX_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH + 1] = { 0 }; // need +1 to reserve space for \0 + int64_t plain_string_len = -1; + if (OB_FAIL(hex_to_cstr(src.ptr(), + src.length(), + encrypted_password_not_hex, + common::OB_MAX_ENCRYPTED_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH))) { + LOG_WARN("failed to hex to cstr", K(src.length()), K(ret)); + } else if (OB_FAIL(ObEncryptionUtil::decrypt_sys_data(tenant_id, + encrypted_password_not_hex, + + src.length() / 2, + plain_string, + common::OB_MAX_EXTERNAL_TABLE_PROPERTIES_ITEM_LENGTH + 1, + plain_string_len))) { + LOG_WARN("failed to decrypt_sys_data", K(ret), K(src.length())); + } else if (0 >= plain_string_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("decrypt dblink password failed", K(ret), K(plain_string_len)); + } else if (OB_FAIL(deep_copy_str(ObString(plain_string_len, plain_string), dst))) { + LOG_WARN("failed to deep copy plain_string", K(ret)); + } else { + LOG_TRACE("succ to decrypt src", K(ret)); + } + } +#endif + return ret; +} + +int ObODPSGeneralFormat::encrypt() +{ + int ret = OB_SUCCESS; + #ifdef OB_BUILD_CPP_ODPS + ObString encrypted_access_id; + ObString encrypted_access_key; + ObString encrypted_sts_token; + if (OB_FAIL(encrypt_str(access_id_, encrypted_access_id))) { + LOG_WARN("failed to encrypt", K(ret)); + } else if (OB_FAIL(encrypt_str(access_key_, encrypted_access_key))) { + LOG_WARN("failed to encrypt", K(ret)); + } else if (OB_FAIL(encrypt_str(sts_token_, encrypted_sts_token))) { + LOG_WARN("failed to encrypt", K(ret)); + } else { + access_id_ = encrypted_access_id; + access_key_ = encrypted_access_key; + sts_token_ = encrypted_sts_token; + } + #endif + return ret; +} + +int ObODPSGeneralFormat::decrypt() +{ + int ret = OB_SUCCESS; + #ifdef OB_BUILD_CPP_ODPS + ObString decrypted_access_id; + ObString decrypted_access_key; + ObString decrypted_sts_token; + if (OB_FAIL(decrypt_str(access_id_, decrypted_access_id))) { + LOG_WARN("failed to encrypt", K(ret)); + } else if (OB_FAIL(decrypt_str(access_key_, decrypted_access_key))) { + LOG_WARN("failed to encrypt", K(ret)); + } else if (OB_FAIL(decrypt_str(sts_token_, decrypted_sts_token))) { + LOG_WARN("failed to encrypt", K(ret)); + } else { + access_id_ = decrypted_access_id; + access_key_ = decrypted_access_key; + sts_token_ = decrypted_sts_token; + } + #endif + return ret; +} + +int ObODPSGeneralFormat::deep_copy_str(const ObString &src, ObString &dest) +{ + int ret = OB_SUCCESS; + char *buf = NULL; + if (src.length() > 0) { + int64_t len = src.length() + 1; + if (OB_ISNULL(buf = static_cast(arena_alloc_.alloc(len)))) { + LOG_ERROR("allocate memory fail", K(len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(buf, src.ptr(), len - 1); + buf[len - 1] = '\0'; + dest.assign_ptr(buf, static_cast(len - 1)); + } + } else { + dest.reset(); + } + return ret; +} + +int ObODPSGeneralFormat::deep_copy(const ObODPSGeneralFormat &src) { + int ret = OB_SUCCESS; + if (OB_FAIL(deep_copy_str(src.access_type_, access_type_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.access_id_, access_id_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.access_key_, access_key_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.sts_token_, sts_token_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.endpoint_, endpoint_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.project_, project_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.schema_, schema_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.table_, table_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.quota_, quota_))) { + LOG_WARN("failed to deep copy", K(ret)); + } else if (OB_FAIL(deep_copy_str(src.compression_code_, compression_code_))) { + LOG_WARN("failed to deep copy", K(ret)); + } + return ret; +} + +int ObODPSGeneralFormat::load_from_json_data(json::Pair *&node, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + int64_t idx = 0; + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + access_type_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_SUCC(ret) && OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + access_id_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + access_key_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + sts_token_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + endpoint_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + project_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + schema_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + table_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + quota_ = obj.get_string(); + } + node = node->get_next(); + } + if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++]) + && json::JT_STRING == node->value_->get_type()) { + ObObj obj; + OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj)); + if (OB_SUCC(ret) && !obj.is_null()) { + compression_code_ = obj.get_string(); + } + node = node->get_next(); + } + return ret; +} + int ObCSVGeneralFormat::init_format(const ObDataInFileStruct &format, int64_t file_column_nums, ObCollationType file_cs_type) @@ -397,6 +693,9 @@ int64_t ObExternalFileFormat::to_string(char *buf, const int64_t buf_len) const pos += csv_format_.to_json_kv_string(buf + pos, buf_len - pos); pos += origin_file_format_str_.to_json_kv_string(buf + pos, buf_len - pos); break; + case ODPS_FORMAT: + pos += odps_format_.to_json_kv_string(buf + pos, buf_len - pos); + break; default: pos += 0; } @@ -445,6 +744,9 @@ int ObExternalFileFormat::load_from_string(const ObString &str, ObIAllocator &al OZ (csv_format_.load_from_json_data(format_type_node, allocator)); OZ (origin_file_format_str_.load_from_json_data(format_type_node, allocator)); break; + case ODPS_FORMAT: + OZ (odps_format_.load_from_json_data(format_type_node, allocator)); + break; case PARQUET_FORMAT: case ORC_FORMAT: break; @@ -480,6 +782,14 @@ int ObExternalFileFormat::mock_gen_column_def( } break; } + case ODPS_FORMAT: { + uint64_t odps_column_idx = column.get_column_id() - OB_APP_MIN_COLUMN_ID + 1; + if (OB_FAIL(temp_str.append_fmt("%s%lu", N_EXTERNAL_TABLE_COLUMN_PREFIX, odps_column_idx))) { + LOG_WARN("fail to append sql str", K(ret)); + } else { + } + break; + } case PARQUET_FORMAT: { if (OB_FAIL(temp_str.append_fmt("get_path(%s, '%.*s')", N_EXTERNAL_FILE_ROW, @@ -497,8 +807,8 @@ int ObExternalFileFormat::mock_gen_column_def( } if (OB_SUCC(ret)) { if (OB_FAIL(ob_write_string(allocator, temp_str.string(), def))) { - LOG_WARN("fail to write string", K(ret)); - } + LOG_WARN("fail to write string", K(ret)); + } } return ret; diff --git a/src/sql/engine/cmd/ob_load_data_parser.h b/src/sql/engine/cmd/ob_load_data_parser.h index d8ef54a213..8683f169dc 100644 --- a/src/sql/engine/cmd/ob_load_data_parser.h +++ b/src/sql/engine/cmd/ob_load_data_parser.h @@ -30,6 +30,55 @@ namespace sql { class ObDataInFileStruct; +struct ObODPSGeneralFormat { + ObODPSGeneralFormat() : + access_type_(), + access_id_(), + access_key_(), + sts_token_(), + endpoint_(), + project_(), + schema_(), + table_(), + quota_(), + compression_code_() + {} + int deep_copy_str(const ObString &src, + ObString &dest); + int deep_copy(const ObODPSGeneralFormat &src); + int encrypt_str(common::ObString &src, common::ObString &dst); + int decrypt_str(common::ObString &src, common::ObString &dst); + int encrypt(); + int decrypt(); + static constexpr const char *OPTION_NAMES[] = { + "ACCESSTYPE", + "ACCESSID", + "ACCESSKEY", + "STSTOKEN", + "ENDPOINT", + "PROJECT_NAME", + "SCHEMA_NAME", + "TABLE_NAME", + "QUOTA_NAME", + "COMPRESSION_CODE", + }; + common::ObString access_type_; + common::ObString access_id_; + common::ObString access_key_; + common::ObString sts_token_; + common::ObString endpoint_; + common::ObString project_; + common::ObString schema_; + common::ObString table_; + common::ObString quota_; + common::ObString compression_code_; + common::ObArenaAllocator arena_alloc_; + int64_t to_json_kv_string(char* buf, const int64_t buf_len) const; + int load_from_json_data(json::Pair *&node, common::ObIAllocator &allocator); + TO_STRING_KV(K_(access_type), K_(access_id), K_(access_key), K_(sts_token), K_(endpoint), K_(project), K_(schema), K_(table), K_(quota), K_(compression_code)); + OB_UNIS_VERSION(1); +}; + struct ObCSVGeneralFormat { ObCSVGeneralFormat () : line_start_str_(), @@ -524,6 +573,7 @@ struct ObExternalFileFormat INVALID_FORMAT = -1, CSV_FORMAT, PARQUET_FORMAT, + ODPS_FORMAT, ORC_FORMAT, MAX_FORMAT }; @@ -542,6 +592,7 @@ struct ObExternalFileFormat ObOriginFileFormat origin_file_format_str_; FormatType format_type_; sql::ObCSVGeneralFormat csv_format_; + sql::ObODPSGeneralFormat odps_format_; ObLoadCompressionFormat compression_format_; uint64_t options_; static const char *FORMAT_TYPE_STR[]; diff --git a/src/sql/engine/cmd/ob_table_executor.cpp b/src/sql/engine/cmd/ob_table_executor.cpp index 13bcd126f0..22e3f2cdc6 100644 --- a/src/sql/engine/cmd/ob_table_executor.cpp +++ b/src/sql/engine/cmd/ob_table_executor.cpp @@ -621,13 +621,14 @@ int ObCreateTableExecutor::execute(ObExecContext &ctx, ObCreateTableStmt &stmt) table_schema.get_external_file_location(), table_schema.get_external_file_location_access_info(), table_schema.get_external_file_pattern(), + table_schema.get_external_properties(), + table_schema.is_partitioned_table(), regexp_vars, ctx.get_allocator(), tmp, file_urls, file_sizes)); } - if (OB_FAIL(ret)) { } else { create_table_arg.is_inner_ = my_session->is_inner(); @@ -1025,7 +1026,10 @@ int ObAlterTableExecutor::execute_alter_external_table(ObExecContext &ctx, ObAlt arg.alter_table_schema_.get_table_id(), arg.alter_table_schema_.get_external_file_location(), arg.alter_table_schema_.get_external_file_location_access_info(), - arg.alter_table_schema_.get_external_file_pattern(), regexp_vars, ctx.get_allocator(), + arg.alter_table_schema_.get_external_file_pattern(), + arg.alter_table_schema_.get_external_properties(), + arg.alter_table_schema_.is_partitioned_table(), + regexp_vars, ctx.get_allocator(), full_path, file_urls, file_sizes)); @@ -1157,6 +1161,8 @@ int ObAlterTableExecutor::execute(ObExecContext &ctx, ObAlterTableStmt &stmt) alter_table_arg.alter_table_schema_.get_external_file_location(), alter_table_arg.alter_table_schema_.get_external_file_location_access_info(), alter_table_arg.alter_table_schema_.get_external_file_pattern(), + alter_table_arg.alter_table_schema_.get_external_properties(), + alter_table_arg.alter_table_schema_.is_partitioned_table(), regexp_vars, ctx.get_allocator(), full_path, diff --git a/src/sql/engine/expr/ob_datum_cast.cpp b/src/sql/engine/expr/ob_datum_cast.cpp index 0e06e812ea..b0a921276e 100644 --- a/src/sql/engine/expr/ob_datum_cast.cpp +++ b/src/sql/engine/expr/ob_datum_cast.cpp @@ -898,6 +898,14 @@ static OB_INLINE int common_int_number(const ObExpr &expr, return ret; } +int ObOdpsDataTypeCastUtil::common_int_number_wrap(const ObExpr &expr, + int64_t in_val, + ObIAllocator &alloc, + number::ObNumber &nmb) +{ + return common_int_number(expr, in_val, alloc, nmb); +} + static OB_INLINE int common_int_date(const ObExpr &expr, const int64_t in_val, ObDatum &res_datum) @@ -1265,6 +1273,14 @@ static OB_INLINE int common_string_number(const ObExpr &expr, return ret; } +int ObOdpsDataTypeCastUtil::common_string_number_wrap(const ObExpr &expr, + const ObString &in_str, + ObIAllocator &alloc, + number::ObNumber &nmb) +{ + return common_string_number(expr, in_str, alloc, nmb); +} + static int common_string_decimalint(const ObExpr &expr, const ObString &in_str, const ObUserLoggingCtx *user_logging_ctx, ObDecimalIntBuilder &res_val) @@ -1395,6 +1411,128 @@ static int common_string_decimalint(const ObExpr &expr, const ObString &in_str, #undef SET_ZERO } +int ObOdpsDataTypeCastUtil::common_string_decimalint_wrap(const ObExpr &expr, const ObString &in_str, + const ObUserLoggingCtx *user_logging_ctx, + ObDecimalIntBuilder &res_val) +{// TODO: add cases +#define SET_ZERO(int_type) \ + int_type v = 0; \ + res_val.from(v); \ + break + + int ret = OB_SUCCESS; + ObObjType in_type = ObVarcharType; + int16_t in_scale = 0, in_precision = 0; + ObScale out_scale = expr.datum_meta_.scale_; + ObPrecision out_prec = expr.datum_meta_.precision_; + ObDecimalIntBuilder tmp_alloc; + ObDecimalInt *decint = nullptr; + int32_t int_bytes = 0; + // set default value + switch (get_decimalint_type(out_prec)) { + case common::DECIMAL_INT_32: { + SET_ZERO(int32_t); + } + case common::DECIMAL_INT_64: { + SET_ZERO(int64_t); + } + case common::DECIMAL_INT_128: { + SET_ZERO(int128_t); + } + case common::DECIMAL_INT_256: { + SET_ZERO(int256_t); + } + case common::DECIMAL_INT_512: { + SET_ZERO(int512_t); + } + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected precision", K(out_prec)); + } + if (OB_FAIL(ret)) { + } else { + if (ObHexStringType == in_type) { + uint64_t in_val = hex_to_uint64(in_str); + in_precision = ob_fast_digits10(in_val); + if (OB_FAIL(wide::from_integer(in_val, tmp_alloc, decint, int_bytes, in_precision))) { + LOG_WARN("from integer failed", K(in_val), K(ret)); + } else { + in_scale = 0; + } + } else if (0 == in_str.length()) { + ret = OB_ERR_TRUNCATED_WRONG_VALUE_FOR_FIELD; + } else if (OB_FAIL(wide::from_string(in_str.ptr(), in_str.length(), tmp_alloc, in_scale, + in_precision, int_bytes, decint))) { + LOG_WARN("failed to parse string", K(ret)); + if (OB_NUMERIC_OVERFLOW == ret && lib::is_mysql_mode()) { + // bug: 4263211. compatible with mysql behavior when value overflows type range. + // select cast('1e500' as decimal); -> max_val + // select cast('-1e500' as decimal); -> min_val + int64_t i = 0; + while (i < in_str.length() && isspace(in_str[i])) { ++i; } + bool is_neg = (in_str[i] == '-'); + const ObDecimalInt *limit_decint = nullptr; + if (is_neg) { + limit_decint = wide::ObDecimalIntConstValue::get_min_value(out_prec); + int_bytes = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(out_prec); + } else { + limit_decint = wide::ObDecimalIntConstValue::get_max_value(out_prec); + int_bytes = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(out_prec); + } + in_scale = out_scale; + in_precision = out_prec; + if (OB_ISNULL(limit_decint)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null decimal int", K(ret)); + } else if (OB_ISNULL(decint = (ObDecimalInt *)tmp_alloc.alloc(int_bytes))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(ret)); + } else { + MEMCPY(decint, limit_decint, int_bytes); + } + } + } + int warning = ret; + ret = OB_SUCCESS; + if (decint != nullptr && int_bytes != 0) { + // Decimal int not null means a valid decimal int was parsed regardless of wether there's + // error or not.We then do scale and calculate res_datum as normal in order to be compatible + // with mysql. + // e.g. + // OceanBase(root@test)>set sql_mode = ''; + // Query OK, 0 rows affected (0.00 sec) + // + // OceanBase(root@test)>insert into t2 values ('1ab'); + // Query OK, 1 row affected (0.00 sec) + // + // OceanBase(root@test)>select * from t2; + // +-------+ + // | a | + // +-------+ + // | 1.000 | + // +-------+ + // 1 row in set (0.01 sec) + if (ObDatumCast::need_scale_decimalint(in_scale, in_precision, out_scale, out_prec)) { + if (OB_FAIL(ObDatumCast::common_scale_decimalint(decint, int_bytes, in_scale, out_scale, + out_prec, expr.extra_, res_val, + user_logging_ctx))) { + LOG_WARN("scale decimal int failed", K(ret)); + } + } else { + res_val.from(decint, int_bytes); + } + } + if (OB_SUCC(ret)) { + const ObCastMode cast_mode = expr.extra_; + if (CAST_FAIL(warning)) { + LOG_WARN("string_decimalint failed", K(ret), K(in_type), K(cast_mode), K(in_str)); + } + } + } + return ret; +#undef SET_ZERO +} + static OB_INLINE int common_string_datetime(const ObExpr &expr, const ObString &in_str, ObEvalCtx &ctx, @@ -1644,6 +1782,111 @@ static int common_string_string(const ObExpr &expr, return ret; } +int ObOdpsDataTypeCastUtil::common_check_convert_string(const ObExpr &expr, + ObEvalCtx &ctx, + const ObString &in_str, + ObObjType in_type, + ObCollationType in_cs_type, + ObDatum &res_datum, + bool &has_set_res) +{ + int ret = OB_SUCCESS; + ObObjType out_type = expr.datum_meta_.type_; + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + if (lib::is_oracle_mode() && + (ob_is_blob(out_type, out_cs_type) || ob_is_blob_locator(out_type, out_cs_type)) && + !(ob_is_blob(in_type, in_cs_type) || ob_is_blob_locator(in_type, in_cs_type) + || ob_is_raw(in_type))) { + // !blob -> blob + if (ObCharType == in_type || ObVarcharType == in_type) { + if (OB_FAIL(ObDatumHexUtils::hextoraw_string(expr, in_str, ctx, res_datum, has_set_res))) { + LOG_WARN("fail to hextoraw_string for blob", K(ret), K(in_str)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_ERROR("invalid use of blob type", K(ret), K(in_str), K(out_type)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "cast to blob type"); + } + } else { + // When convert blob/binary/varbinary to other charset, need to align to mbminlen of destination charset + // by add '\0' prefix in mysql mode. (see mysql String::copy) + const ObCharsetInfo *cs = NULL; + int64_t align_offset = 0; + if (CS_TYPE_BINARY == in_cs_type && lib::is_mysql_mode() + && (NULL != (cs = ObCharset::get_charset(out_cs_type)))) { + if (cs->mbminlen > 0 && in_str.length() % cs->mbminlen != 0) { + align_offset = cs->mbminlen - in_str.length() % cs->mbminlen; + } + } + if (OB_FAIL(common_copy_string_zf(expr, in_str, ctx, res_datum, align_offset))) { + LOG_WARN("common_copy_string_zf failed", K(ret), K(in_str)); + } + } + return ret; +} + +int ObOdpsDataTypeCastUtil::common_string_string_wrap(const ObExpr &expr, + const ObObjType in_type, + const ObCollationType in_cs_type, + const ObObjType out_type, + const ObCollationType out_cs_type, + const ObString &in_str, + ObEvalCtx &ctx, + ObDatum &res_datum, + bool& has_set_res) +{ + int ret = OB_SUCCESS; + if (lib::is_oracle_mode() + && ob_is_clob(in_type, in_cs_type) + && (0 == in_str.length()) + && !ob_is_clob(out_type, out_cs_type)) { + // oracle 模式下的 empty_clob 被 cast 成其他类型时结果是 NULL + res_datum.set_null(); + } else if (CS_TYPE_BINARY != in_cs_type && + CS_TYPE_BINARY != out_cs_type && + (ObCharset::charset_type_by_coll(in_cs_type) != + ObCharset::charset_type_by_coll(out_cs_type))) { + // handle !blob->!blob + char *buf = NULL; + //latin1 1bytes,utf8mb4 4bytes,the factor should be 4 + int64_t buf_len = in_str.length() * ObCharset::CharConvertFactorNum; + uint32_t result_len = 0; + buf = expr.get_str_res_mem(ctx, buf_len); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } else if (OB_FAIL(ObCharset::charset_convert(in_cs_type, in_str.ptr(), + in_str.length(), out_cs_type, buf, + buf_len, result_len, lib::is_mysql_mode(), + !CM_IS_IGNORE_CHARSET_CONVERT_ERR(expr.extra_) && CM_IS_IMPLICIT_CAST(expr.extra_), + ObCharset::is_cs_unicode(out_cs_type) ? 0xFFFD : '?'))) { + LOG_WARN("charset convert failed", K(ret)); + } else { + res_datum.set_string(buf, result_len); + } + } else { + if (CS_TYPE_BINARY == in_cs_type || CS_TYPE_BINARY == out_cs_type) { + // just copy string when in_cs_type or out_cs_type is binary + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_check_convert_string(expr, ctx, in_str, in_type, in_cs_type, res_datum, has_set_res))) { + LOG_WARN("fail to common_check_convert_string", K(ret), K(in_str)); + } + } else if (lib::is_oracle_mode() + && ob_is_clob(in_type, in_cs_type)) { + res_datum.set_string(in_str.ptr(), in_str.length()); + } else if (lib::is_oracle_mode() + && ob_is_clob(out_type, out_cs_type)) { + res_datum.set_string(in_str.ptr(), in_str.length()); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("same charset should not be here, just use cast_eval_arg", K(ret), + K(in_type), K(out_type), K(in_cs_type), K(out_cs_type)); + } + } + LOG_DEBUG("string_string cast", K(ret), K(in_str), + K(ObString(res_datum.len_, res_datum.ptr_))); + return ret; +} + static int common_string_otimestamp(const ObExpr &expr, const ObString &in_str, ObEvalCtx &ctx, @@ -1956,6 +2199,54 @@ static int common_string_text(const ObExpr &expr, return ret; } +int ObOdpsDataTypeCastUtil::common_string_text_wrap(const ObExpr &expr, + const ObString &in_str, + ObEvalCtx &ctx, + const ObLobLocatorV2 *lob_locator, + ObDatum &res_datum, + ObObjType &in_type, + ObCollationType &in_cs_type) +{ + int ret = OB_SUCCESS; + ObObjType out_type = expr.datum_meta_.type_; // ObLongTextType + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + ObString res_str = in_str; + bool is_final_res = false; + bool is_different_charset_type = (ObCharset::charset_type_by_coll(in_cs_type) + != ObCharset::charset_type_by_coll(out_cs_type)); + OB_ASSERT(ob_is_text_tc(out_type)); + if (is_different_charset_type) { + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_string_wrap(expr, in_type, in_cs_type, out_type, + out_cs_type, in_str, ctx, res_datum, is_final_res))) { + LOG_WARN("Lob: fail to cast string to longtext", K(ret), K(in_str), K(expr)); + } else if (res_datum.is_null()) { + // only for blob cast to other types in oracle mode, in/out type/collation type must be different. + is_final_res = true; + } else if (is_final_res) { + // is_final_res = true; // hex to text + } else if (OB_FAIL(copy_datum_str_with_tmp_alloc(ctx, res_datum, res_str))) { + LOG_WARN("Lob: copy datum str with tmp alloc", K(ret)); + } else { /* do nothing */ } + } + + if (OB_SUCC(ret) && !is_final_res) { + ObTextStringDatumResult str_result(expr.datum_meta_.type_, &expr, &ctx, &res_datum); + if (lob_locator == NULL) { + if (OB_FAIL(str_result.init(res_str.length()))) { + LOG_WARN("Lob: init lob result failed"); + } else if (OB_FAIL(str_result.append(res_str.ptr(), res_str.length()))) { + LOG_WARN("Lob: append lob result failed"); + } else { /* do nothing */ } + } else if (OB_FAIL(str_result.copy(lob_locator))) { + LOG_WARN("Lob: copy lob result failed"); + } else { /* do nothing*/ } + str_result.set_result(); + } + + string_lob_debug(in_type, in_cs_type, expr.obj_meta_.has_lob_header(), out_type, out_cs_type, res_str, res_datum, ret); + return ret; +} + static int common_uint_bit(const ObExpr &expr, const uint64_t &in_value, ObEvalCtx &ctx, diff --git a/src/sql/engine/expr/ob_datum_cast.h b/src/sql/engine/expr/ob_datum_cast.h index 7be19a7b84..6cd3d2c5cb 100644 --- a/src/sql/engine/expr/ob_datum_cast.h +++ b/src/sql/engine/expr/ob_datum_cast.h @@ -32,6 +32,45 @@ namespace sql class ObPhysicalPlanCtx; struct ObUserLoggingCtx; +class ObOdpsDataTypeCastUtil +{ +public: + static int common_int_number_wrap(const ObExpr &expr, + int64_t in_val, + ObIAllocator &alloc, + number::ObNumber &nmb); + static int common_string_decimalint_wrap(const ObExpr &expr, const ObString &in_str, + const ObUserLoggingCtx *user_logging_ctx, + ObDecimalIntBuilder &res_val); + static int common_string_number_wrap(const ObExpr &expr, + const ObString &in_str, + ObIAllocator &alloc, + number::ObNumber &nmb); + static int common_string_string_wrap(const ObExpr &expr, + const ObObjType in_type, + const ObCollationType in_cs_type, + const ObObjType out_type, + const ObCollationType out_cs_type, + const ObString &in_str, + ObEvalCtx &ctx, + ObDatum &res_datum, + bool& has_set_res); + static int common_string_text_wrap(const ObExpr &expr, + const ObString &in_str, + ObEvalCtx &ctx, + const ObLobLocatorV2 *lob_locator, + ObDatum &res_datum, + ObObjType &in_type, + ObCollationType &in_cs_type); + static int common_check_convert_string(const ObExpr &expr, + ObEvalCtx &ctx, + const ObString &in_str, + ObObjType in_type, + ObCollationType in_cs_type, + ObDatum &res_datum, + bool &has_set_res); +}; + // extract accuracy info from %expr and call datum_accuracy_check() below. int datum_accuracy_check(const ObExpr &expr, const uint64_t cast_mode, diff --git a/src/sql/engine/px/ob_dfo.cpp b/src/sql/engine/px/ob_dfo.cpp index 8092b23eec..60926c6aee 100644 --- a/src/sql/engine/px/ob_dfo.cpp +++ b/src/sql/engine/px/ob_dfo.cpp @@ -194,6 +194,7 @@ int ObPxSqcMeta::assign(const ObPxSqcMeta &other) temp_file.file_id_ = other_file.file_id_; temp_file.part_id_ = other_file.part_id_; temp_file.file_addr_ = other_file.file_addr_; + temp_file.file_size_ = other_file.file_size_; if (OB_FAIL(ob_write_string(allocator_, other_file.file_url_, temp_file.file_url_))) { LOG_WARN("fail to write string", K(ret)); } else if (OB_FAIL(access_external_table_files_.push_back(temp_file))) { diff --git a/src/sql/engine/px/ob_dfo.h b/src/sql/engine/px/ob_dfo.h index ae66b3cf18..8ff38a6408 100644 --- a/src/sql/engine/px/ob_dfo.h +++ b/src/sql/engine/px/ob_dfo.h @@ -520,7 +520,8 @@ public: p2p_dh_map_info_(), coord_info_ptr_(nullptr), force_bushy_(false), - query_sql_() + query_sql_(), + has_into_odps_(false) { } @@ -561,6 +562,8 @@ public: inline bool has_need_branch_id_op() const { return has_need_branch_id_op_; } inline void set_temp_table_scan(bool has_scan) { has_temp_scan_ = has_scan; } inline bool has_temp_table_scan() const { return has_temp_scan_; } + inline void set_into_odps(bool has_into_odps) { has_into_odps_ = has_into_odps; } + inline bool has_into_odps() const { return has_into_odps_; } inline bool is_fast_dfo() const { return is_prealloc_receive_channel() || is_prealloc_transmit_channel(); } inline void set_slave_mapping_type(SlaveMappingType v) { slave_mapping_type_ = v; } inline SlaveMappingType get_slave_mapping_type() { return slave_mapping_type_; } @@ -813,6 +816,7 @@ private: bool force_bushy_; bool partition_random_affinitize_{true}; // whether do partition random in gi task split ObString query_sql_; + bool has_into_odps_; }; diff --git a/src/sql/engine/px/ob_dfo_mgr.cpp b/src/sql/engine/px/ob_dfo_mgr.cpp index 8ead8434aa..39c12c7b4b 100644 --- a/src/sql/engine/px/ob_dfo_mgr.cpp +++ b/src/sql/engine/px/ob_dfo_mgr.cpp @@ -28,6 +28,7 @@ #include "share/detect/ob_detect_manager_utils.h" #include "sql/engine/px/ob_px_coord_op.h" #include "sql/engine/basic/ob_material_vec_op.h" +#include "sql/engine/basic/ob_select_into_op.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -541,6 +542,18 @@ int ObDfoMgr::do_split(ObExecContext &exec_ctx, OZ(px_coord_info.p2p_temp_table_info_.temp_access_ops_.push_back(phy_op)); OZ(px_coord_info.p2p_temp_table_info_.dfos_.push_back(parent_dfo)); } + } else if (phy_op->get_type() == PHY_SELECT_INTO && NULL != parent_dfo) { + // odps只支持一台机器上的并行 只能有一个sqc + const ObSelectIntoSpec *select_into_spec = static_cast(phy_op); + ObExternalFileFormat external_properties; + if (!select_into_spec->external_properties_.str_.empty()) { + if (OB_FAIL(external_properties.load_from_string(select_into_spec->external_properties_.str_, + allocator))) { + LOG_WARN("failed to load external properties", K(ret)); + } else if (ObExternalFileFormat::FormatType::ODPS_FORMAT == external_properties.format_type_) { + parent_dfo->set_into_odps(true); + } + } } else if (IS_PX_GI(phy_op->get_type()) && NULL != parent_dfo) { const ObGranuleIteratorSpec *gi_spec = static_cast(phy_op); diff --git a/src/sql/engine/px/ob_dfo_scheduler.cpp b/src/sql/engine/px/ob_dfo_scheduler.cpp index b8b7f681c0..44ff81dfa8 100644 --- a/src/sql/engine/px/ob_dfo_scheduler.cpp +++ b/src/sql/engine/px/ob_dfo_scheduler.cpp @@ -1426,7 +1426,7 @@ int ObParallelDfoScheduler::schedule_pair(ObExecContext &exec_ctx, parent))) { LOG_WARN("fail alloc addr by data distribution", K(parent), K(ret)); } else { /*do nohting.*/ } - } else if (parent.is_root_dfo()) { + } else if (parent.is_root_dfo() || parent.has_into_odps()) { // QC/local dfo,直接在本机本线程执行,无需计算执行位置 if (OB_FAIL(ObPXServerAddrUtil::alloc_by_local_distribution(exec_ctx, parent))) { diff --git a/src/sql/engine/px/ob_granule_pump.cpp b/src/sql/engine/px/ob_granule_pump.cpp index 233cb76205..24c8395d0d 100644 --- a/src/sql/engine/px/ob_granule_pump.cpp +++ b/src/sql/engine/px/ob_granule_pump.cpp @@ -643,6 +643,34 @@ int ObGranulePump::add_new_gi_task(ObGranulePumpArgs &args) random_type, partition_granule))) { LOG_WARN("failed to prepare random gi task", K(ret), K(partition_granule)); + } else if (OB_FAIL(init_external_odps_table_downloader(args))) { + LOG_WARN("failed to init external odps table downloader", K(ret)); + } + } + return ret; +} + +int ObGranulePump::init_external_odps_table_downloader(ObGranulePumpArgs &args) +{ + int ret = OB_SUCCESS; + const ObTableScanSpec *tsc = NULL; + sql::ObExternalFileFormat external_odps_format; + if (!args.external_table_files_.empty() && + 0 == args.external_table_files_.at(0).file_id_) { //file_id_ == 0 means it's a external odps table + ObIArray &scan_ops = args.op_info_.get_scan_ops(); + if (scan_ops.empty() || scan_ops.count() != gi_task_array_map_.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid scan ops and gi task array result", K(ret), K(scan_ops.count()), K(gi_task_array_map_.count())); + } else if (OB_ISNULL(tsc = scan_ops.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret)); +#ifdef OB_BUILD_CPP_ODPS + } else if (OB_FAIL(odps_partition_downloader_mgr_.init_downloader(args.external_table_files_, + tsc->tsc_ctdef_.scan_ctdef_.external_file_format_str_.str_))) { + LOG_WARN("init odps_partition_downloader_mgr_ failed", K(ret), K(args.external_table_files_.count())); +#endif + } else { + LOG_TRACE("succ to init odps table partition downloader", K(ret)); } } return ret; @@ -693,6 +721,9 @@ void ObGranulePump::destroy() { gi_task_array_map_.reset(); pump_args_.reset(); +#ifdef OB_BUILD_CPP_ODPS + odps_partition_downloader_mgr_.reset(); +#endif } void ObGranulePump::reset_task_array() @@ -805,6 +836,8 @@ int ObGranuleSplitter::split_gi_task(ObGranulePumpArgs &args, K(ss_ranges), K(taskset_idxs), K(random_type)); + } else { + } } return ret; @@ -997,8 +1030,6 @@ int ObRandomGranuleSplitter::split_granule(ObGranulePumpArgs &args, } else { gi_task_array_result.at(idx).tsc_op_id_ = op_id; } - LOG_TRACE("random granule split a task_array", - K(op_id), K(scan_key_id), K(taskset_array), K(ret), K(scan_ops.count())); } } return ret; @@ -1592,7 +1623,6 @@ int ObGranulePump::init_arg( for (int i = 0; OB_SUCC(ret) && i < partitions_info.count(); ++i) { OZ(arg.partitions_info_.push_back(partitions_info.at(i))); } - OZ(arg.external_table_files_.assign(external_table_files)); if (OB_SUCC(ret)) { diff --git a/src/sql/engine/px/ob_granule_pump.h b/src/sql/engine/px/ob_granule_pump.h index 0d75c523f1..5c5f895ac9 100644 --- a/src/sql/engine/px/ob_granule_pump.h +++ b/src/sql/engine/px/ob_granule_pump.h @@ -556,7 +556,18 @@ public: common::ObIArray *get_pruning_table_location() { return &pruning_table_locations_; } int get_first_tsc_range_cnt(int64_t &cnt); const GITaskArrayMap &get_task_array_map() const { return gi_task_array_map_; } +#ifdef OB_BUILD_CPP_ODPS + inline int get_odps_downloader(int64_t part_id, apsara::odps::sdk::IDownloadPtr &downloader) { + int ret = OB_SUCCESS; + downloader = NULL; + ret = odps_partition_downloader_mgr_.get_odps_downloader(part_id, downloader); + return ret; + } + inline bool is_odps_downloader_inited() { return odps_partition_downloader_mgr_.is_download_mgr_inited(); } + ObOdpsPartitionDownloaderMgr &get_odps_mgr() { return odps_partition_downloader_mgr_; } +#endif private: + int init_external_odps_table_downloader(ObGranulePumpArgs &args); int fetch_granule_by_worker_id(const ObGITaskSet *&task_set, int64_t &pos, int64_t thread_id, @@ -597,6 +608,9 @@ private: bool partition_wise_join_; volatile bool no_more_task_from_shared_pool_; // try notify worker exit earlier GITaskArrayMap gi_task_array_map_; +#ifdef OB_BUILD_CPP_ODPS + ObOdpsPartitionDownloaderMgr odps_partition_downloader_mgr_; +#endif ObGranuleSplitterType splitter_type_; common::ObArray pump_args_; bool need_partition_pruning_; diff --git a/src/sql/engine/px/ob_granule_util.cpp b/src/sql/engine/px/ob_granule_util.cpp index 5a4d020e5f..f47568df9c 100644 --- a/src/sql/engine/px/ob_granule_util.cpp +++ b/src/sql/engine/px/ob_granule_util.cpp @@ -28,6 +28,9 @@ #include "share/external_table/ob_external_table_utils.h" #include "sql/engine/table/ob_external_table_access_service.h" #include "sql/das/ob_das_simple_op.h" +#ifdef OB_BUILD_CPP_ODPS +#include "sql/engine/table/ob_odps_table_row_iter.h" +#endif using namespace oceanbase::common; using namespace oceanbase::share; @@ -85,12 +88,14 @@ int ObGranuleUtil::split_granule_for_external_table(ObIAllocator &allocator, ObIArray &granule_ranges, ObIArray &granule_idx) { - UNUSED(parallelism); UNUSED(tsc); int ret = OB_SUCCESS; + sql::ObExternalFileFormat external_file_format; if (ranges.count() < 1 || tablets.count() < 1 || OB_ISNULL(tsc)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("the invalid argument", K(ret), K(ranges.count()), K(tablets.count())); + } else if (OB_FAIL(external_file_format.load_from_string(tsc->tsc_ctdef_.scan_ctdef_.external_file_format_str_.str_, allocator))) { + LOG_WARN("failed to load from string", K(ret), K(tsc->tsc_ctdef_.scan_ctdef_.external_file_format_str_.str_)); } else if (external_table_files.count() == 1 && external_table_files.at(0).file_id_ == INT64_MAX) { // dealing dummy file @@ -107,6 +112,48 @@ int ObGranuleUtil::split_granule_for_external_table(ObIAllocator &allocator, OB_FAIL(granule_tablets.push_back(tablets.at(0)))) { LOG_WARN("fail to push back", K(ret)); } + } else if (!external_table_files.empty() && + ObExternalFileFormat::ODPS_FORMAT == external_file_format.format_type_) { +#ifdef OB_BUILD_CPP_ODPS + int64_t task_idx = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < external_table_files.count(); ++i) { + const ObExternalFileInfo& external_info = external_table_files.at(i); + if (0 != external_info.file_id_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected file id", K(ret), K(i), K(external_info.file_id_)); + } else { + // file_size_ is the total row cnt of odps table partition + uint64_t block_cnt = (external_info.file_size_ + sql::ObODPSTableRowIterator::ODPS_BLOCK_DOWNLOAD_SIZE - 1) + / sql::ObODPSTableRowIterator::ODPS_BLOCK_DOWNLOAD_SIZE; + uint64_t start_idx = 0; + block_cnt = (0 == block_cnt ? 1 : block_cnt); // one odps table partition should have at least one task, even it's empty + for (int64_t j = 0; OB_SUCC(ret) && j < block_cnt; ++j) { + ObNewRange new_range; + int64_t start = start_idx + (sql::ObODPSTableRowIterator::ODPS_BLOCK_DOWNLOAD_SIZE * j); + int64_t end = sql::ObODPSTableRowIterator::ODPS_BLOCK_DOWNLOAD_SIZE; + if (OB_FAIL(ObExternalTableUtils::make_external_table_scan_range(external_info.file_url_, + external_info.file_id_, + external_info.part_id_, + start_idx + (sql::ObODPSTableRowIterator::ODPS_BLOCK_DOWNLOAD_SIZE * j), + j == block_cnt -1 ? + INT64_MAX : + sql::ObODPSTableRowIterator::ODPS_BLOCK_DOWNLOAD_SIZE, + allocator, + new_range))) { + LOG_WARN("failed to make external table scan range", K(ret)); + } else if ((OB_FAIL(granule_ranges.push_back(new_range)) || + OB_FAIL(granule_idx.push_back(task_idx++)) || + OB_FAIL(granule_tablets.push_back(tablets.at(0))))) { + LOG_WARN("fail to push back", K(ret)); + } + } + } + } +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support odps table in opensource", K(ret)); +#endif } else { for (int64_t i = 0; OB_SUCC(ret) && i < ranges.count(); ++i) { for (int64_t j = 0; OB_SUCC(ret) && j < external_table_files.count(); ++j) { @@ -202,10 +249,6 @@ int ObGranuleUtil::split_block_ranges(ObExecContext &exec_ctx, LOG_TRACE("get the splited results through the new gi split method", K(ret), K(granule_tablets.count()), K(granule_ranges.count()), K(granule_idx)); } - LOG_TRACE("split ranges to granule", K(ret), K(total_task_count), K(parallelism), - K(total_macros_count), K(macros_count_by_partition), K(macros_count_per_task), - K(granule_tablets.count()), K(granule_tablets), K(granule_ranges.count()), K(granule_ranges), - K(granule_idx.count()), K(granule_idx), K(tablets), K(task_count_by_partition)); return ret; } @@ -363,8 +406,8 @@ int ObGranuleUtil::split_block_granule(ObExecContext &exec_ctx, granule_tablets.count() != granule_ranges.count() || granule_tablets.count() != granule_idx.count()) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("the ranges or offsets are empty", - K(ret), K(granule_tablets.count()), K(granule_ranges.count()), K(granule_idx.count())); + LOG_WARN("the ranges or offsets are empty", K(ret), K(granule_tablets.count()), K(granule_ranges.count()), + K(granule_idx.count()), K(granule_tablets), K(granule_ranges), K(granule_idx)); } } } diff --git a/src/sql/engine/px/ob_px_sub_coord.cpp b/src/sql/engine/px/ob_px_sub_coord.cpp index 8f4caf29e9..2e15868971 100644 --- a/src/sql/engine/px/ob_px_sub_coord.cpp +++ b/src/sql/engine/px/ob_px_sub_coord.cpp @@ -47,6 +47,7 @@ #include "sql/das/ob_das_utils.h" #include "sql/engine/px/p2p_datahub/ob_p2p_dh_mgr.h" #include "sql/engine/window_function/ob_window_function_vec_op.h" +#include "sql/engine/basic/ob_select_into_op.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -101,7 +102,6 @@ int ObPxSubCoord::pre_process() LOG_WARN("fail to setup receive/transmit op input", K(ret)); } } - if (OB_SUCC(ret) && !sqc_arg_.sqc_.get_pruning_table_locations().empty()) { sqc_ctx_.gi_pump_.set_need_partition_pruning(true); OZ(sqc_ctx_.gi_pump_.set_pruning_table_location(sqc_arg_.sqc_.get_pruning_table_locations())); @@ -557,6 +557,18 @@ int ObPxSubCoord::setup_op_input(ObExecContext &ctx, LOG_DEBUG("debug wf input", K(wf_spec->role_type_), K(sqc.get_task_count()), K(sqc.get_total_task_count())); } + } else if (root.get_type() == PHY_SELECT_INTO) { + ObPxSqcMeta &sqc = sqc_arg_.sqc_; + ObSelectIntoSpec *select_into_spec = reinterpret_cast(&root); +#ifdef OB_BUILD_CPP_ODPS + ObOdpsPartitionDownloaderMgr &odps_mgr = sqc_ctx.gi_pump_.get_odps_mgr(); + if (OB_FAIL(odps_mgr.init_uploader(select_into_spec->external_properties_.str_, + select_into_spec->external_partition_.str_, + select_into_spec->is_overwrite_, + sqc.get_task_count()))) { + LOG_WARN("failed to init odps uploader", K(ret)); + } +#endif } if (OB_SUCC(ret)) { if (OB_FAIL(root.register_to_datahub(ctx))) { diff --git a/src/sql/engine/px/ob_px_util.cpp b/src/sql/engine/px/ob_px_util.cpp index 0c0e457846..a6292c82b8 100644 --- a/src/sql/engine/px/ob_px_util.cpp +++ b/src/sql/engine/px/ob_px_util.cpp @@ -257,7 +257,6 @@ int ObPXServerAddrUtil::get_external_table_loc( //For recovered cluster, the file addr may not in the cluster. Then igore it. LOG_WARN("filter files in location failed", K(ret)); } - if (OB_FAIL(ret)) { } else if (ext_file_urls.empty()) { const char* dummy_file_name = "#######DUMMY_FILE#######"; diff --git a/src/sql/engine/table/ob_external_table_access_service.cpp b/src/sql/engine/table/ob_external_table_access_service.cpp index 937c24eebe..6aab47fc29 100644 --- a/src/sql/engine/table/ob_external_table_access_service.cpp +++ b/src/sql/engine/table/ob_external_table_access_service.cpp @@ -22,8 +22,11 @@ #include "share/ob_device_manager.h" #include "lib/utility/ob_macro_utils.h" #include "sql/engine/table/ob_parquet_table_row_iter.h" +#ifdef OB_BUILD_CPP_ODPS +#include "sql/engine/table/ob_odps_table_row_iter.h" +#endif #include "sql/engine/cmd/ob_load_data_file_reader.h" -#include "sql/engine/table/ob_orc_table_row_iter.h" +//#include "sql/engine/table/ob_orc_table_row_iter.h" namespace oceanbase { @@ -565,11 +568,24 @@ int ObExternalTableAccessService::table_scan( LOG_WARN("alloc memory failed", K(ret)); } break; - case ObExternalFileFormat::ORC_FORMAT: - if (OB_ISNULL(row_iter = OB_NEWx(ObOrcTableRowIterator, (scan_param.allocator_)))) { + case ObExternalFileFormat::ODPS_FORMAT: +#ifdef OB_BUILD_CPP_ODPS + if (OB_ISNULL(row_iter = OB_NEWx(ObODPSTableRowIterator, (scan_param.allocator_)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc memory failed", K(ret)); } +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support to read odps in opensource", K(ret)); +#endif + case ObExternalFileFormat::ORC_FORMAT: + // if (OB_ISNULL(row_iter = OB_NEWx(ObOrcTableRowIterator, (scan_param.allocator_)))) { + // ret = OB_ALLOCATE_MEMORY_FAILED; + // LOG_WARN("alloc memory failed", K(ret)); + // } + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected format", K(ret), "format", param.external_file_format_.format_type_); break; default: ret = OB_ERR_UNEXPECTED; @@ -603,6 +619,15 @@ int ObExternalTableAccessService::table_rescan(ObVTableScanParam ¶m, ObNewRo case ObExternalFileFormat::ORC_FORMAT: result->reset(); break; + case ObExternalFileFormat::ODPS_FORMAT: +#ifdef OB_BUILD_CPP_ODPS + result->reset(); +#else + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "external odps table"); + LOG_WARN("not support to read odps in opensource", K(ret)); +#endif + break; default: ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected format", K(ret), "format", param.external_file_format_.format_type_); @@ -778,6 +803,13 @@ int ObCSVTableRowIterator::init(const storage::ObTableScanParam *scan_param) } } } + for (int i = 0; i < scan_param_->key_ranges_.count(); ++i) { + int64_t start = 0; + int64_t step = 0; + int64_t part_id = scan_param_->key_ranges_.at(i).get_start_key().get_obj_ptr()[ObExternalTableUtils::PARTITION_ID].get_int(); + const ObString &file_url = scan_param_->key_ranges_.at(i).get_start_key().get_obj_ptr()[ObExternalTableUtils::FILE_URL].get_string(); + int64_t file_id = scan_param_->key_ranges_.at(i).get_start_key().get_obj_ptr()[ObExternalTableUtils::FILE_ID].get_int(); + } return ret; } @@ -847,7 +879,7 @@ int ObExternalTableRowIterator::calc_file_partition_list_value(const int64_t par } else if (OB_ISNULL(table_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("table not exist", K(scan_param_->index_id_), K(scan_param_->tenant_id_)); - } else if (table_schema->is_partitioned_table() && table_schema->is_user_specified_partition_for_external_table()) { + } else if (table_schema->is_partitioned_table() && (table_schema->is_user_specified_partition_for_external_table() || table_schema->is_odps_external_table())) { if (OB_FAIL(table_schema->get_partition_by_part_id(part_id, CHECK_PARTITION_MODE_NORMAL, partition))) { LOG_WARN("get partition failed", K(ret), K(part_id)); } else if (OB_ISNULL(partition) || OB_UNLIKELY(partition->get_list_row_values().count() != 1) @@ -1126,7 +1158,6 @@ int ObCSVTableRowIterator::get_next_row() column_expr->set_evaluated_flag(eval_ctx); } } - return ret; } @@ -1267,7 +1298,6 @@ int ObCSVTableRowIterator::get_next_rows(int64_t &count, int64_t capacity) } count = returned_row_cnt; - return ret; } diff --git a/src/sql/engine/table/ob_odps_table_row_iter.cpp b/src/sql/engine/table/ob_odps_table_row_iter.cpp new file mode 100644 index 0000000000..5888515e38 --- /dev/null +++ b/src/sql/engine/table/ob_odps_table_row_iter.cpp @@ -0,0 +1,2454 @@ +#ifdef OB_BUILD_CPP_ODPS +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "ob_parquet_table_row_iter.h" +#include "lib/charset/ob_charset.h" +#include "sql/engine/px/ob_px_sqc_handler.h" + +namespace oceanbase { +namespace sql { + +int ObODPSTableRowIterator::OdpsPartition::reset() +{ + int ret = OB_SUCCESS; + try { + download_handle_->Complete(); + download_handle_ = NULL; + record_count_ = -1; + name_ = ""; + download_id_ = ""; + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception &ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method", K(ret)); + } + } + return ret; +} + +int ObODPSTableRowIterator::init_tunnel(const sql::ObODPSGeneralFormat &odps_format) +{ + int ret = OB_SUCCESS; + try { + if (OB_FAIL(odps_format_.deep_copy(odps_format))) { + LOG_WARN("failed to deep copy odps format", K(ret)); + } else if (OB_FAIL(odps_format_.decrypt())) { + LOG_WARN("failed to decrypt odps format", K(ret)); + } else { + LOG_TRACE("init tunnel format", K(ret)); + if (0 == odps_format_.access_type_.case_compare("aliyun") || + odps_format_.access_type_.empty()) { + account_ = apsara::odps::sdk::Account(std::string(apsara::odps::sdk::ACCOUNT_ALIYUN), + std::string(odps_format_.access_id_.ptr(), odps_format_.access_id_.length()), + std::string(odps_format_.access_key_.ptr(), odps_format_.access_key_.length())); + } else if (0 == odps_format_.access_type_.case_compare("sts")) { + account_ = apsara::odps::sdk::Account(std::string(apsara::odps::sdk::ACCOUNT_STS), + std::string(odps_format_.sts_token_.ptr(), odps_format_.sts_token_.length())); + } else if (0 == odps_format_.access_type_.case_compare("token")) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported access type", K(ret), K(odps_format_.access_type_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "ODPS access type: token"); + } else if (0 == odps_format_.access_type_.case_compare("domain")) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported access type", K(ret), K(odps_format_.access_type_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "ODPS access type: domain"); + } else if (0 == odps_format_.access_type_.case_compare("taobao")) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported access type", K(ret), K(odps_format_.access_type_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "ODPS access type: taobao"); + } else if (0 == odps_format_.access_type_.case_compare("app")) { + account_ = apsara::odps::sdk::Account(std::string(apsara::odps::sdk::ACCOUNT_APPLICATION), + std::string(odps_format_.access_id_.ptr(), odps_format_.access_id_.length()), + std::string(odps_format_.access_key_.ptr(), odps_format_.access_key_.length())); + } else { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "ODPS access type"); + } + conf_.SetAccount(account_); + conf_.SetEndpoint(std::string(odps_format_.endpoint_.ptr(), odps_format_.endpoint_.length())); + conf_.SetUserAgent("OB_ACCESS_ODPS"); + conf_.SetTunnelQuotaName(std::string(odps_format_.quota_.ptr(), odps_format_.quota_.length())); + if (0 == odps_format_.compression_code_.case_compare("zlib")) { + conf_.SetCompressOption(apsara::odps::sdk::CompressOption::ZLIB_COMPRESS); + } else if (0 == odps_format_.compression_code_.case_compare("zstd")) { + conf_.SetCompressOption(apsara::odps::sdk::CompressOption::ZSTD_COMPRESS); + } else if (0 == odps_format_.compression_code_.case_compare("lz4")) { + conf_.SetCompressOption(apsara::odps::sdk::CompressOption::LZ4_COMPRESS); + } else if (0 == odps_format_.compression_code_.case_compare("odps_lz4")) { + conf_.SetCompressOption(apsara::odps::sdk::CompressOption::ODPS_LZ4_COMPRESS); + } else { + conf_.SetCompressOption(apsara::odps::sdk::CompressOption::NO_COMPRESS); + } + tunnel_.Init(conf_); // do not need try catch + if (OB_ISNULL((odps_ = apsara::odps::sdk::IODPS::Create(conf_, // do not need try catch + std::string(odps_format_.project_.ptr(), + odps_format_.project_.length()))).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_ISNULL((odps_->GetTables()).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_ISNULL((table_handle_ = odps_->GetTables()->Get(std::string(odps_format_.project_.ptr(), odps_format_.project_.length()), // do not need try catch + std::string(odps_format_.schema_.ptr(), odps_format_.schema_.length()), + std::string(odps_format_.table_.ptr(), odps_format_.table_.length()))).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call external driver api", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception &ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call external driver api", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call external driver api", K(ret), KP(this)); + } + } + + return ret; +} + +int ObODPSTableRowIterator::create_downloader(ObString &part_spec, apsara::odps::sdk::IDownloadPtr &downloader) +{ + int ret = OB_SUCCESS; + try { + apsara::odps::sdk::IDownloadPtr download_handle = NULL; + downloader = NULL; + std::string project(odps_format_.project_.ptr(), odps_format_.project_.length()); + std::string table(odps_format_.table_.ptr(), odps_format_.table_.length()); + std::string std_part_spec(part_spec.ptr(), part_spec.length()); + std::string download_id(""); + std::string schema(odps_format_.schema_.ptr(), odps_format_.schema_.length()); + download_handle = tunnel_.CreateDownload(project, + table, + std_part_spec, + download_id, + schema); + if (OB_ISNULL(download_handle.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + downloader = download_handle; + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call CreateDownload", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception &ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call CreateDownload", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call CreateDownload", K(ret), KP(this)); + } + } + return ret; +} + +int ObODPSTableRowIterator::init(const storage::ObTableScanParam *scan_param) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(scan_param)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("scan param is null", K(ret)); + } else if (OB_FAIL(ObExternalTableRowIterator::init(scan_param))) { + LOG_WARN("failed to call ObExternalTableRowIterator::init", K(ret)); + } else { + if (OB_FAIL(init_tunnel(scan_param_->external_file_format_.odps_format_))) { + LOG_WARN("failed to init odps tunnel", K(ret)); + } else if (OB_FAIL(pull_column())) { + LOG_WARN("failed to pull column info", K(ret)); + } else if (OB_FAIL(prepare_expr())) { + LOG_WARN("failed to prepare expr", K(ret)); + } + } + return ret; +} + +int ObODPSTableRowIterator::next_task() +{ + int ret = OB_SUCCESS; + ObEvalCtx &eval_ctx = scan_param_->op_->get_eval_ctx(); + int64_t task_idx = state_.task_idx_; + int64_t start = 0; + int64_t step = 0; + if (++task_idx >= scan_param_->key_ranges_.count()) { + ret = OB_ITER_END; + LOG_WARN("odps table iter end", K(total_count_), K(state_), K(task_idx), K(ret)); + } else { + ObEvalCtx &ctx = scan_param_->op_->get_eval_ctx(); + ObPxSqcHandler *sqc = ctx.exec_ctx_.get_sqc_handler();// if sqc is not NULL, odps read is in px plan + if (OB_FAIL(ObExternalTableUtils::resolve_odps_start_step(scan_param_->key_ranges_.at(task_idx), + ObExternalTableUtils::LINE_NUMBER, + start, + step))) { + LOG_WARN("failed to resolve range in external table", K(ret)); + } else { + try { + const ObString &part_spec = scan_param_->key_ranges_.at(task_idx).get_start_key().get_obj_ptr()[ObExternalTableUtils::FILE_URL].get_string(); + int64_t part_id = scan_param_->key_ranges_.at(task_idx).get_start_key().get_obj_ptr()[ObExternalTableUtils::PARTITION_ID].get_int(); + std::string project(odps_format_.project_.ptr(), odps_format_.project_.length()); + std::string table(odps_format_.table_.ptr(), odps_format_.table_.length()); + std::string std_part_spec(part_spec.ptr(), part_spec.length()); + std::string download_id(""); + std::string schema(odps_format_.schema_.ptr(), odps_format_.schema_.length()); + std::vector column_names; + if (OB_ISNULL(sqc) && + OB_ISNULL((state_.download_handle_ = tunnel_.CreateDownload(project, + table, + std_part_spec, + download_id, + schema)).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_NOT_NULL(sqc) && + !sqc->get_sqc_ctx().gi_pump_.is_odps_downloader_inited() && + OB_ISNULL((state_.download_handle_ = tunnel_.CreateDownload(project, + table, + std_part_spec, + download_id, + schema)).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_NOT_NULL(sqc) && + sqc->get_sqc_ctx().gi_pump_.is_odps_downloader_inited() && + OB_FAIL(sqc->get_sqc_ctx().gi_pump_.get_odps_downloader(part_id, state_.download_handle_))) { + LOG_WARN("failed to get odps downloader", K(ret), K(part_id)); + } else if (OB_ISNULL(state_.download_handle_.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_ISNULL((state_.record_reader_handle_ = state_.download_handle_->OpenReader(start, + step, + column_names, + true)).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_FAIL(calc_file_partition_list_value(part_id, arena_alloc_, state_.part_list_val_))) { + LOG_WARN("failed to calc parttion list value", K(part_id), K(ret)); + } else { + state_.task_idx_ = task_idx; + state_.part_id_ = part_id; + state_.start_ = start; + state_.step_ = step; + state_.count_ = 0; + state_.is_from_gi_pump_ = OB_NOT_NULL(sqc) && sqc->get_sqc_ctx().gi_pump_.is_odps_downloader_inited(); + state_.download_id_ = state_.download_handle_->GetDownloadId(); + state_.part_spec_ = std_part_spec; + // what if error occur after this line, how to close state_.record_reader_handle_? + LOG_TRACE("get a new task", K(ret), K(batch_size_), K(state_)); + if (OB_SUCC(ret) && -1 == batch_size_) { // exec once only + batch_size_ = eval_ctx.max_batch_size_; + if (0 == batch_size_) { + // even state_.record_reader_handle_ was destroyed, record_/records_ is still valid. + // see class RecordReader : public IRecordReader to check it. + if (OB_ISNULL((record_ = state_.record_reader_handle_->CreateBufferRecord()).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret)); + } else { + LOG_TRACE("odps record_ inited", K(ret), K(batch_size_)); + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < batch_size_; ++i) { + if (OB_ISNULL((records_[i] = state_.record_reader_handle_->CreateBufferRecord()).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret), K(i)); + } + } + LOG_TRACE("odps records_ inited", K(ret), K(batch_size_)); + } + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling odps api", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception &ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling odps api", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling odps api", K(ret)); + } + } + } + } + return ret; +} + +int ObODPSTableRowIterator::print_type_map_user_info(apsara::odps::sdk::ODPSColumnTypeInfo odps_type_info, + const ObExpr *ob_type_expr) +{ + int ret = OB_SUCCESS; + try { + std::string odps_type_str = odps_type_info.ToTypeString();//need try catch + const char* odps_type_cstr = odps_type_str.c_str(); + const char* ob_type_cstr = ""; + if (OB_NOT_NULL(ob_type_expr)) { + ObArrayWrap buf; + int64_t pos = 0; + ob_type_cstr = ob_obj_type_str(ob_type_expr->datum_meta_.type_); + if (OB_SUCCESS == buf.allocate_array(arena_alloc_, 128)) { // 128 is enough to hold user info str + ob_sql_type_str(buf.get_data(), buf.count(), pos, + ob_type_expr->datum_meta_.type_, + ob_type_expr->max_length_, + ob_type_expr->datum_meta_.precision_, + ob_type_expr->datum_meta_.scale_, + ob_type_expr->datum_meta_.cs_type_); + if (pos < buf.count()) { + buf.at(pos++) = '\0'; + ob_type_cstr = buf.get_data(); + } + } + } + LOG_USER_ERROR(OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH, odps_type_cstr, ob_type_cstr); + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call ToTypeString", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call ToTypeString", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call ToTypeString", K(ret), KP(this)); + } + } + return ret; +} + +int ObODPSTableRowIterator::check_type_static(apsara::odps::sdk::ODPSColumnTypeInfo odps_type_info, + const ObExpr *ob_type_expr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ob_type_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret)); + } else { + const apsara::odps::sdk::ODPSColumnType odps_type = odps_type_info.mType; + const int32_t odps_type_length = odps_type_info.mSpecifiedLength; + const int32_t odps_type_precision = odps_type_info.mPrecision; + const int32_t odps_type_scale = odps_type_info.mScale; + const ObObjType ob_type = ob_type_expr->obj_meta_.get_type(); + const int32_t ob_type_length = ob_type_expr->max_length_; + const int32_t ob_type_precision = ob_type_expr->datum_meta_.precision_; + const int32_t ob_type_scale = ob_type_expr->datum_meta_.scale_; + switch(odps_type) + { + case apsara::odps::sdk::ODPS_TINYINT: + case apsara::odps::sdk::ODPS_BOOLEAN: + { + if (ObTinyIntType == ob_type) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_SMALLINT: + { + if (ObSmallIntType == ob_type) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_INTEGER: + { + if (ObInt32Type == ob_type) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_BIGINT: + { + if (ObIntType == ob_type) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_FLOAT: + { + if (ObFloatType == ob_type && ob_type_length == 12 && ob_type_scale == -1) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_DOUBLE: + { + if (ObDoubleType == ob_type && ob_type_length == 23 && ob_type_scale == -1) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_DECIMAL: + { + if (ObDecimalIntType == ob_type || + ObNumberType == ob_type) { + // odps_type to ob_type is valid + if (ob_type_precision != odps_type_precision || // in ObExpr, max_length_ is decimal type's precision + ob_type_scale != odps_type_scale) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid precision or scale or length", K(ret), K(odps_type), K(ob_type), + K(ob_type_length), + K(ob_type_precision), + K(ob_type_scale), + K(odps_type_length), + K(odps_type_precision), + K(odps_type_scale)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_CHAR: + { + if (ObCharType == ob_type) { + // odps_type to ob_type is valid + if (ob_type_length != odps_type_length) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid precision or scale or length", K(ret), K(odps_type), K(ob_type), + K(ob_type_length), + K(ob_type_precision), + K(ob_type_scale), + K(odps_type_length), + K(odps_type_precision), + K(odps_type_scale)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_VARCHAR: + { + if (ObVarcharType == ob_type) { + // odps_type to ob_type is valid + if (ob_type_length != odps_type_length) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid precision or scale or length", K(ret), K(odps_type), K(ob_type), + K(ob_type_length), + K(ob_type_precision), + K(ob_type_scale), + K(odps_type_length), + K(odps_type_precision), + K(odps_type_scale)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_STRING: + case apsara::odps::sdk::ODPS_BINARY://check length at runtime + { + if (ObVarcharType == ob_type || + ObTinyTextType == ob_type || + ObTextType == ob_type || + ObLongTextType == ob_type || + ObMediumTextType == ob_type) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP: + { + if (ObTimestampType == ob_type) { + // odps_type to ob_type is valid + if (ob_type_scale < 6) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid precision or scale or length", K(ret), K(odps_type), K(ob_type), + K(ob_type_length), + K(ob_type_precision), + K(ob_type_scale), + K(odps_type_length), + K(odps_type_precision), + K(odps_type_scale)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP_NTZ: + { + if (ObDateTimeType == ob_type) { + // odps_type to ob_type is valid + if (ob_type_scale < 6) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid precision or scale or length", K(ret), K(odps_type), K(ob_type), + K(ob_type_length), + K(ob_type_precision), + K(ob_type_scale), + K(odps_type_length), + K(odps_type_precision), + K(odps_type_scale)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_DATE: + { + if (ObDateType == ob_type) { + // odps_type to ob_type is valid + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + case apsara::odps::sdk::ODPS_DATETIME: + { + if (ObDateTimeType == ob_type) { + // odps_type to ob_type is valid + if (ob_type_scale < 3) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid precision or scale or length", K(ret), K(odps_type), K(ob_type), + K(ob_type_length), + K(ob_type_precision), + K(ob_type_scale), + K(odps_type_length), + K(odps_type_precision), + K(odps_type_scale)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + } else { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("invalid odps type map to ob type", K(ret), K(odps_type), K(ob_type)); + print_type_map_user_info(odps_type_info, ob_type_expr); + } + break; + } + default: + { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported odps type", K(ret)); + } + } + } + return ret; +} + +int ObODPSTableRowIterator::prepare_expr() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(scan_param_) || OB_ISNULL(scan_param_->ext_file_column_exprs_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", KP(scan_param_), K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < scan_param_->ext_file_column_exprs_->count(); ++i) { + const ObExpr *cur_expr = scan_param_->ext_file_column_exprs_->at(i); // do no check is NULL or not + int target_idx = cur_expr->extra_ - 1; + if (OB_UNLIKELY(cur_expr->type_ == T_PSEUDO_EXTERNAL_FILE_COL && + (target_idx < 0 || target_idx >= column_list_.count()))) { + ret = OB_EXTERNAL_ODPS_UNEXPECTED_ERROR; + LOG_WARN("unexcepted target_idx", K(ret), K(target_idx), K(column_list_.count())); + LOG_USER_ERROR(OB_EXTERNAL_ODPS_UNEXPECTED_ERROR, "wrong column index point to odps, please check the index of external$tablecol[index] and metadata$partition_list_col[index]"); + } else if (OB_FAIL(target_column_id_list_.push_back(target_idx))) { + LOG_WARN("failed to keep target_idx", K(ret)); + } else if (cur_expr->type_ == T_PSEUDO_EXTERNAL_FILE_COL && + OB_FAIL(check_type_static(column_list_.at(target_idx).type_info_, cur_expr))) { + LOG_WARN("odps type map ob type not support", K(ret), K(target_idx)); + } + } + ObEvalCtx &eval_ctx = scan_param_->op_->get_eval_ctx(); + void *vec_mem = NULL; + void *records_mem = NULL; + malloc_alloc_.set_attr(lib::ObMemAttr(scan_param_->tenant_id_, "ODPSRowIter")); + if (OB_FAIL(ret)) { + // do nothing + } else if (0 > eval_ctx.max_batch_size_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected max_batch_size_", K(ret), K(eval_ctx.max_batch_size_)); + } else { + if (0 == eval_ctx.max_batch_size_) { + // do nothing + } else if (OB_ISNULL(vec_mem = malloc_alloc_.alloc(ObBitVector::memory_size(eval_ctx.max_batch_size_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for skip", K(ret), K(eval_ctx.max_batch_size_)); + } else if (OB_ISNULL(records_mem = malloc_alloc_.alloc(eval_ctx.max_batch_size_ * sizeof(apsara::odps::sdk::ODPSTableRecordPtr)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for skip", K(ret), K(eval_ctx.max_batch_size_)); + } else { + bit_vector_cache_ = to_bit_vector(vec_mem); + bit_vector_cache_->reset(eval_ctx.max_batch_size_); + records_ = static_cast(records_mem); + for (int64_t i = 0; i < eval_ctx.max_batch_size_; ++i) { + new (&records_[i]) apsara::odps::sdk::ODPSTableRecordPtr; + } + } + } + } + return ret; +} + +int ObODPSTableRowIterator::pull_partition_info() +{ + int ret = OB_SUCCESS; + partition_list_.reset(); + std::vector part_specs; + try { + if (OB_ISNULL(table_handle_.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + table_handle_->GetPartitionNames(part_specs); + is_part_table_ = true; + } + } catch (apsara::odps::sdk::OdpsException& ex) { + std::string ex_msg = ex.what(); + if (std::string::npos != ex_msg.find("ODPS-0110031")) { // ODPS-0110031 means table is not a partitional table + is_part_table_ = false; + } else if (OB_FAIL(ret)) { + //do nothing + } else { + ret = OB_ODPS_ERROR; + LOG_WARN("failed to call GetPartitionNames method in ODPS sdk", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("failed to call GetPartitionNames method in ODPS sdk", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling GetPartitionNames method", K(ret)); + } + } + if (OB_SUCC(ret) && !is_part_table_) { + part_specs.push_back(""); + } + try { + std::string project(odps_format_.project_.ptr(), odps_format_.project_.length()); + std::string table(odps_format_.table_.ptr(), odps_format_.table_.length()); + std::string schema(odps_format_.schema_.ptr(), odps_format_.schema_.length()); + for (std::vector::iterator part_spec = part_specs.begin(); OB_SUCC(ret) && part_spec != part_specs.end(); part_spec++) { + std::string download_id(""); + apsara::odps::sdk::IDownloadPtr download_handle = NULL; + int64_t record_count = -1; + if (OB_ISNULL((download_handle = tunnel_.CreateDownload(project, + table, + *part_spec, + download_id, + schema)).get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (FALSE_IT(download_id = download_handle->GetDownloadId())) { + } else if (FALSE_IT(record_count = download_handle->GetRecordCount())) { + } else if (OB_FAIL(partition_list_.push_back(OdpsPartition(*part_spec, + download_handle, + download_id, + record_count)))){ + LOG_WARN("failed to push back partition_list_", K(ret)); + } + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call external driver api", K(ret), K(ex.what()), KP(this)); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception &ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call external driver api", K(ret), K(ex.what()), KP(this)); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when call external driver api", K(ret), KP(this)); + } + } + return ret; +} + +int ObODPSTableRowIterator::pull_column() { + int ret = OB_SUCCESS; + if (OB_ISNULL(table_handle_.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + try { + apsara::odps::sdk::IODPSTableSchemaPtr schema_handle = table_handle_->GetSchema(); + if (OB_ISNULL(schema_handle.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + for (uint32_t i = 0; OB_SUCC(ret) && i < schema_handle->GetColumnCount(); i++) { + if (OB_FAIL(column_list_.push_back(OdpsColumn(schema_handle->GetTableColumn(i).GetName(), + schema_handle->GetTableColumn(i).GetTypeInfo())))) { + LOG_WARN("failed to push back column_list_", K(ret)); + } + } + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("failed to call GetSchema method in ODPS sdk", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception &ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("failed to call GetSchema method in ODPS sdk", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling GetSchema method", K(ret)); + } + } + } + return ret; +} + +void ObODPSTableRowIterator::reset() +{ + state_.reuse(); // reset state_ to initial values for rescan +} + +int ObODPSTableRowIterator::StateValues::reuse() +{ + int ret = OB_SUCCESS; + try { + if (-1 == task_idx_) { + // do nothing + } else if (OB_ISNULL(record_reader_handle_.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret), K(lbt())); + } else if (OB_ISNULL(download_handle_.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret), K(lbt())); + } else { + record_reader_handle_->Close(); + record_reader_handle_.reset(); + if (!is_from_gi_pump_) { + download_handle_->Complete(); + } + download_handle_.reset(); + } + } catch (const apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method, ignore it", K(ret), K(ex.what())); + //LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method, ignore it", K(ret), K(ex.what())); + //LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method, ignore it", K(ret)); + } + } + task_idx_ = -1; + part_id_ = 0; + start_ = 0; + step_ = 0; + count_ = 0; + part_spec_.clear(); + download_id_.clear(); + part_list_val_.reset(); + is_from_gi_pump_ = false; + return ret; +} + +int ObODPSTableRowIterator::get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = 0; + ObMallocHookAttrGuard guard(mem_attr_); + int64_t returned_row_cnt = 0; + ObEvalCtx &ctx = scan_param_->op_->get_eval_ctx(); + const ExprFixedArray &file_column_exprs = *(scan_param_->ext_file_column_exprs_); + if (state_.count_ >= state_.step_ && OB_FAIL(next_task())) { + if (OB_ITER_END != ret) { + LOG_WARN("get next task failed", K(ret)); + } + } else { + int64_t returned_row_cnt = 0; + try { + while(returned_row_cnt < capacity && OB_SUCC(ret)) { + if (!(state_.record_reader_handle_->Read(*records_[returned_row_cnt]))) { + break; + } else { + ++state_.count_; + ++total_count_; + ++returned_row_cnt; + } + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + std::string ex_msg = ex.what(); + if (std::string::npos != ex_msg.find("EOF")) { //EOF + LOG_TRACE("odps eof", K(ret), K(total_count_), K(returned_row_cnt), K(ex.what())); + if (0 == returned_row_cnt && (INT64_MAX == state_.step_ || state_.count_ == state_.step_)) { + state_.step_ = state_.count_; // goto get next task + count = 0; + } else if (0 == returned_row_cnt) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected returned_row_cnt", K(total_count_), K(returned_row_cnt), K(state_), K(ret)); + } + } else { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Read method", K(ret), K(total_count_), K(returned_row_cnt), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Read method", K(ret), K(total_count_), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Read method", K(ret), K(total_count_)); + } + } + if (OB_FAIL(ret)) { + // do nothing + } else if (0 == returned_row_cnt && (INT64_MAX == state_.step_ || state_.count_ == state_.step_)) { + state_.step_ = state_.count_; // goto get next task + count = 0; + } else if (0 == returned_row_cnt) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected returned_row_cnt", K(total_count_), K(returned_row_cnt), K(state_), K(ret)); + } else { + for (int64_t column_idx = 0; OB_SUCC(ret) && column_idx < target_column_id_list_.count(); ++column_idx) { + uint32_t target_idx = target_column_id_list_.at(column_idx); + ObExpr &expr = *file_column_exprs.at(column_idx); + ObDatum *datums = expr.locate_batch_datums(ctx); + ObObjType type = expr.obj_meta_.get_type(); + if (expr.type_ == T_PSEUDO_PARTITION_LIST_COL) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + int64_t loc_idx = file_column_exprs.at(column_idx)->extra_ - 1; + if (OB_UNLIKELY(loc_idx < 0 || loc_idx >= state_.part_list_val_.get_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted loc_idx", K(ret), K(loc_idx), K(state_.part_list_val_.get_count()), KP(&state_.part_list_val_)); + } else if (state_.part_list_val_.get_cell(loc_idx).is_null()) { + datums[row_idx].set_null(); + } else { + CK (OB_NOT_NULL(datums[row_idx].ptr_)); + OZ (datums[row_idx].from_obj(state_.part_list_val_.get_cell(loc_idx))); + } + } + } else { + apsara::odps::sdk::ODPSColumnType odps_type = column_list_.at(target_idx).type_info_.mType; + try { + switch(odps_type) + { + case apsara::odps::sdk::ODPS_BOOLEAN: + { + if ((ObTinyIntType == type || + ObSmallIntType == type || + ObMediumIntType == type || + ObInt32Type == type || + ObIntType == type) && !is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const bool* v = records_[row_idx]->GetBoolValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + datums[row_idx].set_int(*v); + } + } + } else if (ObNumberType == type && is_oracle_mode()) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + const bool* v = records_[row_idx]->GetBoolValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int64_t in_val = *v; + ObNumStackOnceAlloc tmp_alloc; + number::ObNumber nmb; + OZ(ObOdpsDataTypeCastUtil::common_int_number_wrap(expr, in_val, tmp_alloc, nmb), in_val); + if (OB_FAIL(ret)) { + LOG_WARN("failed to cast int to number", K(ret), K(row_idx), K(column_idx)); + } + } + } + } else if (ObDecimalIntType == type && is_oracle_mode()) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + const bool* v = records_[row_idx]->GetBoolValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int64_t in_val = *v; + ObDecimalInt *decint = nullptr; + int32_t int_bytes = 0; + ObDecimalIntBuilder tmp_alloc; + ObScale out_scale = expr.datum_meta_.scale_; + ObScale in_scale = 0; + ObPrecision out_prec = expr.datum_meta_.precision_; + ObPrecision in_prec = + ObAccuracy::MAX_ACCURACY2[lib::is_oracle_mode()][type] + .get_precision(); + const static int64_t DECINT64_MAX = get_scale_factor(MAX_PRECISION_DECIMAL_INT_64); + if (in_prec > MAX_PRECISION_DECIMAL_INT_64 && in_val < DECINT64_MAX) { + in_prec = MAX_PRECISION_DECIMAL_INT_64; + } + if (OB_FAIL(wide::from_integer(in_val, tmp_alloc, decint, int_bytes, in_prec))) { + LOG_WARN("from_integer failed", K(ret), K(in_val), K(row_idx), K(column_idx)); + } else if (ObDatumCast::need_scale_decimalint(in_scale, in_prec, out_scale, out_prec)) { + ObDecimalIntBuilder res_val; + if (OB_FAIL(ObDatumCast::common_scale_decimalint(decint, int_bytes, in_scale, out_scale, + out_prec, expr.extra_, res_val))) { + LOG_WARN("scale decimal int failed", K(ret), K(row_idx), K(column_idx)); + } else { + datums[row_idx].set_decimal_int(res_val.get_decimal_int(), res_val.get_int_bytes()); + } + } else { + datums[row_idx].set_decimal_int(decint, int_bytes); + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_TINYINT: + case apsara::odps::sdk::ODPS_SMALLINT: + case apsara::odps::sdk::ODPS_INTEGER: + case apsara::odps::sdk::ODPS_BIGINT: + { + if ((ObTinyIntType == type || + ObSmallIntType == type || + ObMediumIntType == type || + ObInt32Type == type || + ObIntType == type) && !is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const int64_t* v = records_[row_idx]->GetIntValue(target_idx, odps_type); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + datums[row_idx].set_int(*v); + } + } + } else if (ObNumberType == type && is_oracle_mode()) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + const int64_t* v = records_[row_idx]->GetIntValue(target_idx, odps_type); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int64_t in_val = *v; + ObNumStackOnceAlloc tmp_alloc; + number::ObNumber nmb; + OZ(ObOdpsDataTypeCastUtil::common_int_number_wrap(expr, in_val, tmp_alloc, nmb), in_val); + if (OB_FAIL(ret)) { + LOG_WARN("failed to cast int to number", K(ret), K(row_idx), K(column_idx)); + } + } + } + } else if (ObDecimalIntType == type && is_oracle_mode()) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + const int64_t* v = records_[row_idx]->GetIntValue(target_idx, odps_type); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int64_t in_val = *v; + ObDecimalInt *decint = nullptr; + int32_t int_bytes = 0; + ObDecimalIntBuilder tmp_alloc; + ObScale out_scale = expr.datum_meta_.scale_; + ObScale in_scale = 0; + ObPrecision out_prec = expr.datum_meta_.precision_; + ObPrecision in_prec = + ObAccuracy::MAX_ACCURACY2[lib::is_oracle_mode()][type] + .get_precision(); + const static int64_t DECINT64_MAX = get_scale_factor(MAX_PRECISION_DECIMAL_INT_64); + if (in_prec > MAX_PRECISION_DECIMAL_INT_64 && in_val < DECINT64_MAX) { + in_prec = MAX_PRECISION_DECIMAL_INT_64; + } + if (OB_FAIL(wide::from_integer(in_val, tmp_alloc, decint, int_bytes, in_prec))) { + LOG_WARN("from_integer failed", K(ret), K(in_val), K(row_idx), K(column_idx)); + } else if (ObDatumCast::need_scale_decimalint(in_scale, in_prec, out_scale, out_prec)) { + ObDecimalIntBuilder res_val; + if (OB_FAIL(ObDatumCast::common_scale_decimalint(decint, int_bytes, in_scale, out_scale, + out_prec, expr.extra_, res_val))) { + LOG_WARN("scale decimal int failed", K(ret), K(row_idx), K(column_idx)); + } else { + datums[row_idx].set_decimal_int(res_val.get_decimal_int(), res_val.get_int_bytes()); + } + } else { + datums[row_idx].set_decimal_int(decint, int_bytes); + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_FLOAT: + { + if (ObFloatType == type) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const float* v = records_[row_idx]->GetFloatValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + datums[row_idx].set_float(*v); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DOUBLE: + { + if (ObDoubleType == type) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const double* v = records_[row_idx]->GetDoubleValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + datums[row_idx].set_double(*v); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DECIMAL: + { + if (ObDecimalIntType == type) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + uint32_t len; + const char* v = records_[row_idx]->GetDecimalValue(target_idx, len); + if (v == NULL || len == 0) { + datums[row_idx].set_null(); + } else { + ObString in_str(len, v); + ObDecimalIntBuilder res_val; + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_decimalint_wrap(expr, in_str, ctx.exec_ctx_.get_user_logging_ctx(), + res_val))) { + LOG_WARN("cast string to decimal int failed", K(ret), K(row_idx), K(column_idx)); + } else { + datums[row_idx].set_decimal_int(res_val.get_decimal_int(), res_val.get_int_bytes()); + } + } + } + } else if (ObNumberType == type) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + uint32_t len; + const char* v = records_[row_idx]->GetDecimalValue(target_idx, len); + if (v == NULL || len == 0) { + datums[row_idx].set_null(); + } else { + ObString in_str(len, v); + number::ObNumber nmb; + ObNumStackOnceAlloc tmp_alloc; + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_number_wrap(expr, in_str, tmp_alloc, nmb))) { + LOG_WARN("cast string to number failed", K(ret), K(row_idx), K(column_idx)); + } else { + datums[row_idx].set_number(nmb); + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_CHAR: + { + if (ObCharType == type) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + uint32_t len; + const char* v = records_[row_idx]->GetStringValue(target_idx, len, apsara::odps::sdk::ODPS_CHAR); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datums[row_idx].set_null(); + } else { + ObObjType in_type = ObCharType; + ObObjType out_type = ObCharType; + ObCollationType in_cs_type = CS_TYPE_UTF8MB4_BIN; // odps's collation + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + ObString in_str(len, v); + bool has_set_res = false; + ObCharsetType out_charset = common::ObCharset::charset_type_by_coll(out_cs_type); + if (CHARSET_UTF8MB4 == out_charset || CHARSET_BINARY == out_charset) { + datums[row_idx].set_string(in_str); + } else if (OB_FAIL(oceanbase::sql::ObOdpsDataTypeCastUtil::common_string_string_wrap(expr, in_type, in_cs_type, out_type, + out_cs_type, in_str, ctx, datums[row_idx], has_set_res))) { + LOG_WARN("cast string to string failed", K(ret), K(row_idx), K(column_idx)); + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_VARCHAR: + { + if (ObVarcharType == type) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + uint32_t len; + const char* v = records_[row_idx]->GetStringValue(target_idx, len, apsara::odps::sdk::ODPS_VARCHAR); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datums[row_idx].set_null(); + } else { + ObObjType in_type = ObVarcharType; + ObObjType out_type = ObVarcharType; + ObCollationType in_cs_type = CS_TYPE_UTF8MB4_BIN; // odps's collation + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + ObString in_str(len, v); + bool has_set_res = false; + ObCharsetType out_charset = common::ObCharset::charset_type_by_coll(out_cs_type); + if (CHARSET_UTF8MB4 == out_charset || CHARSET_BINARY == out_charset) { + datums[row_idx].set_string(in_str); + } else if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_string_wrap(expr, in_type, in_cs_type, out_type, + out_cs_type, in_str, ctx, datums[row_idx], has_set_res))) { + LOG_WARN("cast string to string failed", K(ret), K(row_idx), K(column_idx)); + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_STRING: + case apsara::odps::sdk::ODPS_BINARY: + { + if (ObVarcharType == type) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + uint32_t len; + const char* v = records_[row_idx]->GetStringValue(target_idx, len, odps_type); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datums[row_idx].set_null(); + } else if (len > expr.max_length_) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("unexpected data length", K(ret), + K(len), + K(expr.max_length_), + K(column_list_.at(target_idx)), + K(type)); + print_type_map_user_info(column_list_.at(target_idx).type_info_, &expr); + } else { + ObObjType in_type = ObVarcharType; + ObObjType out_type = ObVarcharType; + ObCollationType in_cs_type = apsara::odps::sdk::ODPS_STRING == odps_type ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_BINARY; + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + ObString in_str(len, v); + bool has_set_res = false; + ObCharsetType out_charset = common::ObCharset::charset_type_by_coll(out_cs_type); + if (CHARSET_UTF8MB4 == out_charset || CHARSET_BINARY == out_charset) { + datums[row_idx].set_string(in_str); + } else if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_string_wrap(expr, in_type, in_cs_type, out_type, + out_cs_type, in_str, ctx, datums[row_idx], has_set_res))) { + LOG_WARN("cast string to string failed", K(ret), K(row_idx), K(column_idx)); + } + } + } + } else if (ObTinyTextType == type || + ObTextType == type || + ObLongTextType == type || + ObMediumTextType == type) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + uint32_t len; + const char* v = records_[row_idx]->GetStringValue(target_idx, len, odps_type); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datums[row_idx].set_null(); + } else if (!text_type_length_is_valid_at_runtime(type, len)) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("unexpected data length", K(ret), + K(len), + K(expr.max_length_), + K(column_list_.at(target_idx)), + K(type)); + print_type_map_user_info(column_list_.at(target_idx).type_info_, &expr); + } else { + ObString in_str(len, v); + ObObjType in_type = ObVarcharType; + ObCollationType in_cs_type = apsara::odps::sdk::ODPS_STRING == odps_type ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_BINARY; + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_text_wrap(expr, in_str, ctx, NULL, datums[row_idx], in_type, in_cs_type))) { + LOG_WARN("cast string to text failed", K(ret), K(row_idx), K(column_idx)); + } + } + } + } else if (ObRawType == type) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + batch_info_guard.set_batch_idx(row_idx); + uint32_t len; + const char* v = records_[row_idx]->GetStringValue(target_idx, len, odps_type); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datums[row_idx].set_null(); + } else { + ObString in_str(len, v); + bool has_set_res = false; + if (OB_FAIL(ObDatumHexUtils::hextoraw_string(expr, in_str, ctx, datums[row_idx], has_set_res))) { + LOG_WARN("cast string to raw failed", K(ret), K(row_idx), K(column_idx)); + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP: + { + if (ObTimestampType == type && !is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const apsara::odps::sdk::TimeStamp* v = records_[row_idx]->GetTimestampValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int64_t datetime = v->GetSecond() * USECS_PER_SEC + (v->GetNano() + 500) / 1000; // suppose odps's timezone is same to oceanbase + datums[row_idx].set_datetime(datetime); + } + } + } else if (false && ObTimestampLTZType == type && is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const apsara::odps::sdk::TimeStamp* v = records_[row_idx]->GetTimestampValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP_NTZ: + { + if (ObDateTimeType == type && !is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const apsara::odps::sdk::TimeStamp* v = records_[row_idx]->GetTimestampNTZValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int64_t datetime = v->GetSecond() * USECS_PER_SEC + (v->GetNano() + 500) / 1000; + datums[row_idx].set_datetime(datetime); + } + } + } else if (false && ObTimestampNanoType == type && is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const apsara::odps::sdk::TimeStamp* v = records_[row_idx]->GetTimestampNTZValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DATE: + { + if (ObDateType == type && !is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const int64_t* v = records_[row_idx]->GetDateValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int32_t date = *v; + datums[row_idx].set_date(date); + } + } + } else if (false && ObDateTimeType == type && is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const int64_t* v = records_[row_idx]->GetDateValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DATETIME: + { + if (ObDateTimeType == type && !is_oracle_mode()) { + int32_t tmp_offset = 0; + int64_t res_offset = 0; + if (OB_FAIL(ctx.exec_ctx_.get_my_session()->get_timezone_info()->get_timezone_offset(0, tmp_offset))) { + LOG_WARN("failed to get timezone offset", K(ret)); + } else { + res_offset = SEC_TO_USEC(tmp_offset); + } + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const int64_t* v = records_[row_idx]->GetDatetimeValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + int64_t datetime = *v * 1000 + res_offset; + datums[row_idx].set_datetime(datetime); + } + } + } else if (false && ObTimestampNanoType == type && is_oracle_mode()) { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + const int64_t* v = records_[row_idx]->GetDatetimeValue(target_idx); + if (v == NULL) { + datums[row_idx].set_null(); + } else { + + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_JSON: + { + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < returned_row_cnt; ++row_idx) { + uint32_t len; + const char* v = records_[row_idx]->GetJsonValue(target_idx, len); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datums[row_idx].set_null(); + } else { + datums[row_idx].set_string(v, len); + } + } + break; + } + default: + { + ret = OB_NOT_SUPPORTED; + LOG_WARN("odps not support type", K(ret)); + } + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling OpenReader method", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling OpenReader method", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling OpenReader method", K(ret)); + } + } + } + } + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_idx(0); + for (int i = 0; OB_SUCC(ret) && i < file_column_exprs.count(); i++) { + file_column_exprs.at(i)->set_evaluated_flag(ctx); + } + for (int i = 0; OB_SUCC(ret) && i < column_exprs_.count(); i++) { + ObExpr *column_expr = column_exprs_.at(i); + ObExpr *column_convert_expr = scan_param_->ext_column_convert_exprs_->at(i); + OZ (column_convert_expr->eval_batch(ctx, *bit_vector_cache_, returned_row_cnt)); + if (OB_SUCC(ret)) { + MEMCPY(column_expr->locate_batch_datums(ctx), + column_convert_expr->locate_batch_datums(ctx), sizeof(ObDatum) * returned_row_cnt); + column_expr->set_evaluated_flag(ctx); + } + } + if (OB_SUCC(ret)) { + count = returned_row_cnt; + } + } + } + return ret; +} + +int ObODPSTableRowIterator::get_next_row() +{ + int ret = OB_SUCCESS; + if (state_.count_ >= state_.step_ && OB_FAIL(next_task())) { + if (OB_ITER_END != ret) { + LOG_WARN("get next task failed", K(ret)); + } + } else { + if (OB_FAIL(inner_get_next_row())) { + LOG_WARN("failed to get next row inner", K(ret)); + } + } + while(OB_SUCC(ret) && get_next_task_) { // used to get next task which has data need to fetch + if (state_.count_ >= state_.step_ && OB_FAIL(next_task())) { + if (OB_ITER_END != ret) { + LOG_WARN("get next task failed", K(ret)); + } + } else { + if (OB_FAIL(inner_get_next_row())) { + LOG_WARN("failed to get next row inner", K(ret)); + } + } + } + return ret; +} + +int ObODPSTableRowIterator::inner_get_next_row() +{ + int ret = OB_SUCCESS; + ObMallocHookAttrGuard guard(mem_attr_); + ObEvalCtx &ctx = scan_param_->op_->get_eval_ctx(); + const ExprFixedArray &file_column_exprs = *(scan_param_->ext_file_column_exprs_); + get_next_task_ = false; + try { + if (!(state_.record_reader_handle_->Read(*record_))) { + if (INT64_MAX == state_.step_ || state_.count_ == state_.step_) { + get_next_task_ = true; // goto get next task + state_.step_ = state_.count_; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected end", K(total_count_), K(state_), K(ret)); + } + } else { + ++state_.count_; + ++total_count_; + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + std::string ex_msg = ex.what(); + if (std::string::npos != ex_msg.find("EOF")) { // EOF + LOG_TRACE("odps eof", K(ret), K(total_count_), K(state_), K(ex.what())); + if (INT64_MAX == state_.step_ || state_.count_ == state_.step_) { + get_next_task_ = true; // goto get next task + state_.step_ = state_.count_; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected end", K(total_count_), K(state_), K(ret)); + } + } else { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Read or Close method", K(ret), K(total_count_), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Read or Close method", K(ret), K(total_count_), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Read or Close method", K(ret)); + } + } + if (OB_FAIL(ret)) { + // do nothing + } else if (get_next_task_) { + // do nothing + } else { + for (int64_t column_idx = 0; OB_SUCC(ret) && column_idx < target_column_id_list_.count(); ++column_idx) { + uint32_t target_idx = target_column_id_list_.at(column_idx); + ObExpr &expr = *file_column_exprs.at(column_idx); // do not check null ptr + ObDatum &datum = expr.locate_datum_for_write(ctx); + ObObjType type = expr.obj_meta_.get_type(); + if (expr.type_ == T_PSEUDO_PARTITION_LIST_COL) { + int64_t loc_idx = file_column_exprs.at(column_idx)->extra_ - 1; + if (OB_UNLIKELY(loc_idx < 0 || loc_idx >= state_.part_list_val_.get_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted loc_idx", K(ret), K(loc_idx), K(state_.part_list_val_.get_count()), KP(&state_.part_list_val_)); + } else if (state_.part_list_val_.get_cell(loc_idx).is_null()) { + datum.set_null(); + } else { + CK (OB_NOT_NULL(datum.ptr_)); + OZ (datum.from_obj(state_.part_list_val_.get_cell(loc_idx))); + } + } else { + apsara::odps::sdk::ODPSColumnType odps_type = column_list_.at(target_idx).type_info_.mType; + try { + switch(odps_type) + { + case apsara::odps::sdk::ODPS_BOOLEAN: + { + if ((ObTinyIntType == type || + ObSmallIntType == type || + ObMediumIntType == type || + ObInt32Type == type || + ObIntType == type) && !is_oracle_mode()) { + const bool* v = record_->GetBoolValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + datum.set_int(*v); + } + } else if (ObNumberType == type && is_oracle_mode()) { + const bool* v = record_->GetBoolValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + int64_t in_val = *v; + ObNumStackOnceAlloc tmp_alloc; + number::ObNumber nmb; + OZ(ObOdpsDataTypeCastUtil::common_int_number_wrap(expr, in_val, tmp_alloc, nmb), in_val); + if (OB_FAIL(ret)) { + LOG_WARN("failed to cast int to number", K(ret), K(column_idx)); + } + } + } else if (ObDecimalIntType == type && is_oracle_mode()) { + const bool* v = record_->GetBoolValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + int64_t in_val = *v; + ObDecimalInt *decint = nullptr; + int32_t int_bytes = 0; + ObDecimalIntBuilder tmp_alloc; + ObScale out_scale = expr.datum_meta_.scale_; + ObScale in_scale = 0; + ObPrecision out_prec = expr.datum_meta_.precision_; + ObPrecision in_prec = + ObAccuracy::MAX_ACCURACY2[lib::is_oracle_mode()][type] + .get_precision(); + const static int64_t DECINT64_MAX = get_scale_factor(MAX_PRECISION_DECIMAL_INT_64); + if (in_prec > MAX_PRECISION_DECIMAL_INT_64 && in_val < DECINT64_MAX) { + in_prec = MAX_PRECISION_DECIMAL_INT_64; + } + if (OB_FAIL(wide::from_integer(in_val, tmp_alloc, decint, int_bytes, in_prec))) { + LOG_WARN("from_integer failed", K(ret), K(in_val), K(column_idx)); + } else if (ObDatumCast::need_scale_decimalint(in_scale, in_prec, out_scale, out_prec)) { + ObDecimalIntBuilder res_val; + if (OB_FAIL(ObDatumCast::common_scale_decimalint(decint, int_bytes, in_scale, out_scale, + out_prec, expr.extra_, res_val))) { + LOG_WARN("scale decimal int failed", K(ret), K(column_idx)); + } else { + datum.set_decimal_int(res_val.get_decimal_int(), res_val.get_int_bytes()); + } + } else { + datum.set_decimal_int(decint, int_bytes); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_TINYINT: + case apsara::odps::sdk::ODPS_SMALLINT: + case apsara::odps::sdk::ODPS_INTEGER: + case apsara::odps::sdk::ODPS_BIGINT: + { + if ((ObTinyIntType == type || + ObSmallIntType == type || + ObMediumIntType == type || + ObInt32Type == type || + ObIntType == type) && !is_oracle_mode()) { + const int64_t* v = record_->GetIntValue(target_idx, odps_type); + if (v == NULL) { + datum.set_null(); + } else { + datum.set_int(*v); + } + } else if (ObNumberType == type && is_oracle_mode()) { + const int64_t* v = record_->GetIntValue(target_idx, odps_type); + if (v == NULL) { + datum.set_null(); + } else { + int64_t in_val = *v; + ObNumStackOnceAlloc tmp_alloc; + number::ObNumber nmb; + OZ(ObOdpsDataTypeCastUtil::common_int_number_wrap(expr, in_val, tmp_alloc, nmb), in_val); + if (OB_FAIL(ret)) { + LOG_WARN("failed to cast int to number", K(ret), K(column_idx)); + } + } + } else if (ObDecimalIntType == type && is_oracle_mode()) { + const int64_t* v = record_->GetIntValue(target_idx, odps_type); + if (v == NULL) { + datum.set_null(); + } else { + int64_t in_val = *v; + ObDecimalInt *decint = nullptr; + int32_t int_bytes = 0; + ObDecimalIntBuilder tmp_alloc; + ObScale out_scale = expr.datum_meta_.scale_; + ObScale in_scale = 0; + ObPrecision out_prec = expr.datum_meta_.precision_; + ObPrecision in_prec = + ObAccuracy::MAX_ACCURACY2[lib::is_oracle_mode()][type] + .get_precision(); + const static int64_t DECINT64_MAX = get_scale_factor(MAX_PRECISION_DECIMAL_INT_64); + if (in_prec > MAX_PRECISION_DECIMAL_INT_64 && in_val < DECINT64_MAX) { + in_prec = MAX_PRECISION_DECIMAL_INT_64; + } + if (OB_FAIL(wide::from_integer(in_val, tmp_alloc, decint, int_bytes, in_prec))) { + LOG_WARN("from_integer failed", K(ret), K(in_val), K(column_idx)); + } else if (ObDatumCast::need_scale_decimalint(in_scale, in_prec, out_scale, out_prec)) { + ObDecimalIntBuilder res_val; + if (OB_FAIL(ObDatumCast::common_scale_decimalint(decint, int_bytes, in_scale, out_scale, + out_prec, expr.extra_, res_val))) { + LOG_WARN("scale decimal int failed", K(ret), K(column_idx)); + } else { + datum.set_decimal_int(res_val.get_decimal_int(), res_val.get_int_bytes()); + } + } else { + datum.set_decimal_int(decint, int_bytes); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_FLOAT: + { + if (ObFloatType == type) { + const float* v = record_->GetFloatValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + datum.set_float(*v); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DOUBLE: + { + if (ObDoubleType == type) { + const double* v = record_->GetDoubleValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + datum.set_double(*v); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DECIMAL: + { + if (ObDecimalIntType == type) { + uint32_t len; + const char* v = record_->GetDecimalValue(target_idx, len); + if (v == NULL || len == 0) { + datum.set_null(); + } else { + ObString in_str(len, v); + ObDecimalIntBuilder res_val; + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_decimalint_wrap(expr, in_str, ctx.exec_ctx_.get_user_logging_ctx(), res_val))) { + LOG_WARN("cast string to decimal int failed", K(ret), K(column_idx)); + } else { + datum.set_decimal_int(res_val.get_decimal_int(), res_val.get_int_bytes()); + } + } + } else if (ObNumberType == type) { + uint32_t len; + const char* v = record_->GetDecimalValue(target_idx, len); + if (v == NULL || len == 0) { + datum.set_null(); + } else { + ObString in_str(len, v); + number::ObNumber nmb; + ObNumStackOnceAlloc tmp_alloc; + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_number_wrap(expr, in_str, tmp_alloc, nmb))) { + LOG_WARN("cast string to number failed", K(ret), K(column_idx)); + } else { + datum.set_number(nmb); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_CHAR: + { + if (ObCharType == type) { + uint32_t len; + const char* v = record_->GetStringValue(target_idx, len, apsara::odps::sdk::ODPS_CHAR); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datum.set_null(); + } else { + ObObjType in_type = ObCharType; + ObObjType out_type = ObCharType; + ObCollationType in_cs_type = CS_TYPE_UTF8MB4_BIN; // odps's collation + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + ObString in_str(len, v); + bool has_set_res = false; + ObCharsetType out_charset = common::ObCharset::charset_type_by_coll(out_cs_type); + if (CHARSET_UTF8MB4 == out_charset || CHARSET_BINARY == out_charset) { + datum.set_string(in_str); + } else if (OB_FAIL(oceanbase::sql::ObOdpsDataTypeCastUtil::common_string_string_wrap(expr, in_type, in_cs_type, out_type, + out_cs_type, in_str, ctx, datum, has_set_res))) { + LOG_WARN("cast string to string failed", K(ret), K(column_idx)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_VARCHAR: + { + if (ObVarcharType == type) { + uint32_t len; + const char* v = record_->GetStringValue(target_idx, len, apsara::odps::sdk::ODPS_VARCHAR); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datum.set_null(); + } else { + ObObjType in_type = ObVarcharType; + ObObjType out_type = ObVarcharType; + ObCollationType in_cs_type = CS_TYPE_UTF8MB4_BIN; // odps's collation + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + ObString in_str(len, v); + bool has_set_res = false; + ObCharsetType out_charset = common::ObCharset::charset_type_by_coll(out_cs_type); + if (CHARSET_UTF8MB4 == out_charset || CHARSET_BINARY == out_charset) { + datum.set_string(in_str); + } else if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_string_wrap(expr, in_type, in_cs_type, out_type, + out_cs_type, in_str, ctx, datum, has_set_res))) { + LOG_WARN("cast string to string failed", K(ret), K(column_idx)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_STRING: + case apsara::odps::sdk::ODPS_BINARY: + { + if (ObVarcharType == type) { + uint32_t len; + const char* v = record_->GetStringValue(target_idx, len, odps_type); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datum.set_null(); + } else if (len > expr.max_length_) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("unexpected data length", K(ret), + K(len), + K(expr.max_length_), + K(column_list_.at(target_idx)), + K(type)); + print_type_map_user_info(column_list_.at(target_idx).type_info_, &expr); + } else { + ObObjType in_type = ObVarcharType; + ObObjType out_type = ObVarcharType; + ObCollationType in_cs_type = apsara::odps::sdk::ODPS_STRING == odps_type ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_BINARY; + ObCollationType out_cs_type = expr.datum_meta_.cs_type_; + ObString in_str(len, v); + bool has_set_res = false; + ObCharsetType out_charset = common::ObCharset::charset_type_by_coll(out_cs_type); + if (CHARSET_UTF8MB4 == out_charset || CHARSET_BINARY == out_charset) { + datum.set_string(in_str); + } else if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_string_wrap(expr, in_type, in_cs_type, out_type, + out_cs_type, in_str, ctx, datum, has_set_res))) { + LOG_WARN("cast string to string failed", K(ret), K(column_idx)); + } + } + } else if (ObTinyTextType == type || + ObTextType == type || + ObLongTextType == type || + ObMediumTextType == type) { + uint32_t len; + const char* v = record_->GetStringValue(target_idx, len, odps_type); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datum.set_null(); + } else if (!text_type_length_is_valid_at_runtime(type, len)) { + ret = OB_EXTERNAL_ODPS_COLUMN_TYPE_MISMATCH; + LOG_WARN("unexpected data length", K(ret), + K(len), + K(expr.max_length_), + K(column_list_.at(target_idx)), + K(type)); + print_type_map_user_info(column_list_.at(target_idx).type_info_, &expr); + } else { + ObString in_str(len, v); + ObObjType in_type = ObVarcharType; // lcqlog todo ObHexStringType ? + ObCollationType in_cs_type = apsara::odps::sdk::ODPS_STRING == odps_type ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_BINARY; + if (OB_FAIL(ObOdpsDataTypeCastUtil::common_string_text_wrap(expr, in_str, ctx, NULL, datum, in_type, in_cs_type))) { + LOG_WARN("cast string to text failed", K(ret), K(column_idx)); + } + } + } else if (ObRawType == type) { + uint32_t len; + const char* v = record_->GetStringValue(target_idx, len, odps_type); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datum.set_null(); + } else { + ObString in_str(len, v); + bool has_set_res = false; + if (OB_FAIL(ObDatumHexUtils::hextoraw_string(expr, in_str, ctx, datum, has_set_res))) { + LOG_WARN("cast string to raw failed", K(ret), K(column_idx)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP: + { + if (ObTimestampType == type && !is_oracle_mode()) { + const apsara::odps::sdk::TimeStamp* v = record_->GetTimestampValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + int64_t datetime = v->GetSecond() * USECS_PER_SEC + (v->GetNano() + 500) / 1000; // suppose odps's timezone is same to oceanbase + datum.set_datetime(datetime); + } + } else if (false && ObTimestampLTZType == type && is_oracle_mode()) { + const apsara::odps::sdk::TimeStamp* v = record_->GetTimestampValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_TIMESTAMP_NTZ: + { + if (ObDateTimeType == type && !is_oracle_mode()) { + const apsara::odps::sdk::TimeStamp* v = record_->GetTimestampNTZValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + int64_t datetime = v->GetSecond() * USECS_PER_SEC + (v->GetNano() + 500) / 1000; + datum.set_datetime(datetime); + } + } else if (false && ObTimestampNanoType == type && is_oracle_mode()) { + const apsara::odps::sdk::TimeStamp* v = record_->GetTimestampNTZValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DATE: + { + if (ObDateType == type && !is_oracle_mode()) { + const int64_t* v = record_->GetDateValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + int32_t date = *v; + datum.set_date(date); + } + } else if (false && ObDateTimeType == type && is_oracle_mode()) { + const int64_t* v = record_->GetDateValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_DATETIME: + { + if (ObDateTimeType == type && !is_oracle_mode()) { + const int64_t* v = record_->GetDatetimeValue(target_idx); + int32_t tmp_offset = 0; + int64_t res_offset = 0; + if (v == NULL) { + datum.set_null(); + } else if (OB_FAIL(ctx.exec_ctx_.get_my_session()->get_timezone_info()->get_timezone_offset(0, tmp_offset))) { + LOG_WARN("failed to get timezone offset", K(ret)); + } else { + res_offset = SEC_TO_USEC(tmp_offset); + int64_t datetime = *v * 1000 + res_offset; + datum.set_datetime(datetime); + } + } else if (false && ObTimestampNanoType == type && is_oracle_mode()) { + const int64_t* v = record_->GetDatetimeValue(target_idx); + if (v == NULL) { + datum.set_null(); + } else { + + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr type", K(ret), K(type), K(column_idx)); + } + break; + } + case apsara::odps::sdk::ODPS_JSON: + { + uint32_t len; + const char* v = record_->GetJsonValue(target_idx, len); + if (v == NULL || (0 == len && lib::is_oracle_mode())) { + datum.set_null(); + } else { + datum.set_string(v, len); + } + break; + } + default: + { + ret = OB_NOT_SUPPORTED; + LOG_WARN("odps not support type", K(ret)); + } + } + } catch (apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling OpenReader method", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling OpenReader method", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling OpenReader method", K(ret)); + } + } + } + } + for (int i = 0; OB_SUCC(ret) && i < file_column_exprs.count(); i++) { + file_column_exprs.at(i)->set_evaluated_flag(ctx); + } + for (int i = 0; OB_SUCC(ret) && i < column_exprs_.count(); i++) { + ObExpr *column_expr = column_exprs_.at(i); + ObExpr *column_convert_expr = scan_param_->ext_column_convert_exprs_->at(i); + ObDatum *convert_datum = NULL; + OZ (column_convert_expr->eval(ctx, convert_datum)); + if (OB_SUCC(ret)) { + column_expr->locate_datum_for_write(ctx) = *convert_datum; + column_expr->set_evaluated_flag(ctx); + } + } + } + return ret; +} + +int ObOdpsPartitionDownloaderMgr::init_downloader(common::ObArray &external_table_files, const ObString &properties) +{ + int ret = OB_SUCCESS; + int64_t partition_cnt = external_table_files.count(); + sql::ObExternalFileFormat external_odps_format; + if (inited_) { + // do nothing + } else if (0 == partition_cnt || properties.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected args", K(partition_cnt), K(properties), K(ret)); + } else if (!odps_mgr_map_.created() && + OB_FAIL(odps_mgr_map_.create(partition_cnt, + "OdpsTable", + "OdpsTableReader"))) { + LOG_WARN("create hash table failed", K(ret), K(partition_cnt)); + } else if (OB_FAIL(external_odps_format.load_from_string(properties, arena_alloc_))) { + LOG_WARN("failed to init external_odps_format", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < external_table_files.count(); ++i) { + share::ObExternalFileInfo &odps_partition = external_table_files.at(i); + OdpsPartitionDownloader *downloader = NULL; + if (0 != odps_partition.file_id_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected file id", K(ret), K(i), K(odps_partition.file_id_), K(odps_partition.part_id_)); + } else if (OB_ISNULL(downloader = static_cast( + arena_alloc_.alloc(sizeof(OdpsPartitionDownloader))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret), K(sizeof(OdpsPartitionDownloader))); + } else if (FALSE_IT(new(downloader)OdpsPartitionDownloader())) { + } else if (OB_FAIL(downloader->odps_driver_.init_tunnel(external_odps_format.odps_format_))) { + LOG_WARN("failed to init tunnel", K(ret), K(odps_partition.part_id_), K(properties)); + //odps_partition.file_url_ is odps partition specification, which is a literal string value + } else if (OB_FAIL(downloader->odps_driver_.create_downloader(odps_partition.file_url_, + downloader->odps_partition_downloader_))) { + LOG_WARN("failed create odps partition downloader", K(ret), K(i), K(odps_partition.part_id_), K(odps_partition.file_url_)); + } else if (OB_FAIL(odps_mgr_map_.set_refactored(odps_partition.part_id_, + reinterpret_cast(downloader)))) { + downloader->reset(); + LOG_WARN("failed to set refactored", K(ret), K(odps_partition.part_id_)); + } + } + if (OB_SUCC(ret)) { + inited_ = true; + LOG_TRACE("succ to init odps downloader", K(ret)); + } + } + return ret; +} + +int ObOdpsPartitionDownloaderMgr::create_upload_session(const sql::ObODPSGeneralFormat &odps_format, + const ObString &external_partition, + bool is_overwrite, + apsara::odps::sdk::IUploadPtr &upload) +{ + int ret = OB_SUCCESS; + apsara::odps::sdk::Configuration conf; + apsara::odps::sdk::OdpsTunnel tunnel; + const char* account_type = ""; + try { + conf.SetEndpoint(std::string(odps_format.endpoint_.ptr(), odps_format.endpoint_.length())); + conf.SetUserAgent("OB_ACCESS_ODPS"); + conf.SetTunnelQuotaName(std::string(odps_format.quota_.ptr(), odps_format.quota_.length())); + if (0 == odps_format.compression_code_.case_compare("zlib")) { + conf.SetCompressOption(apsara::odps::sdk::CompressOption::ZLIB_COMPRESS); + } else if (0 == odps_format.compression_code_.case_compare("zstd")) { + conf.SetCompressOption(apsara::odps::sdk::CompressOption::ZSTD_COMPRESS); + } else if (0 == odps_format.compression_code_.case_compare("lz4")) { + conf.SetCompressOption(apsara::odps::sdk::CompressOption::LZ4_COMPRESS); + } else if (0 == odps_format.compression_code_.case_compare("odps_lz4")) { + conf.SetCompressOption(apsara::odps::sdk::CompressOption::ODPS_LZ4_COMPRESS); + } else { + conf.SetCompressOption(apsara::odps::sdk::CompressOption::NO_COMPRESS); + } + if (0 == odps_format.access_type_.case_compare("aliyun") || + odps_format.access_type_.empty()) { + account_type = apsara::odps::sdk::ACCOUNT_ALIYUN; + } else if (0 == odps_format.access_type_.case_compare("sts")) { + account_type = apsara::odps::sdk::ACCOUNT_STS; + } else if (0 == odps_format.access_type_.case_compare("app")) { + account_type = apsara::odps::sdk::ACCOUNT_APPLICATION; + } else if (0 == odps_format.access_type_.case_compare("token") + || 0 == odps_format.access_type_.case_compare("domain") + || 0 == odps_format.access_type_.case_compare("taobao")) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported access type", K(ret), K(odps_format.access_type_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this ODPS access type"); + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported access type", K(ret), K(odps_format.access_type_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this ODPS access type"); + } + if (OB_SUCC(ret)) { + apsara::odps::sdk::Account account(std::string(account_type), + std::string(odps_format.access_id_.ptr(), odps_format.access_id_.length()), + std::string(odps_format.access_key_.ptr(), odps_format.access_key_.length())); + conf.SetAccount(account); + tunnel.Init(conf); + if (OB_UNLIKELY(!(upload = tunnel.CreateUpload( + std::string(odps_format.project_.ptr(), odps_format.project_.length()), + std::string(odps_format.table_.ptr(), odps_format.table_.length()), + std::string(external_partition.ptr(), external_partition.length()), + "", + is_overwrite, + std::string(odps_format.schema_.ptr(), odps_format.schema_.length()))))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when create odps upload session", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when create odps upload session", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when create odps upload session", K(ret)); + } + } + return ret; +} + +int ObOdpsPartitionDownloaderMgr::init_uploader(const ObString &properties, + const ObString &external_partition, + bool is_overwrite, + int64_t parallel) +{ + int ret = OB_SUCCESS; + sql::ObExternalFileFormat external_properties; + apsara::odps::sdk::IUploadPtr upload; + apsara::odps::sdk::IRecordWriterPtr record_writer; + void *ptr; + OdpsUploader *uploader; + if (inited_) { + // do nothing + } else if (properties.empty()) { + // do nothing + } else if (OB_FAIL(external_properties.load_from_string(properties, arena_alloc_))) { + LOG_WARN("failed to init external_odps_format", K(ret)); + } else if (sql::ObExternalFileFormat::ODPS_FORMAT != external_properties.format_type_) { + // do nothing + } else if (!odps_mgr_map_.created() && + OB_FAIL(odps_mgr_map_.create(parallel, "IntoOdps"))) { + LOG_WARN("create hash table failed", K(ret), K(parallel)); + } else if (OB_FAIL(external_properties.odps_format_.decrypt())) { + LOG_WARN("failed to decrypt odps format", K(ret)); + } else { + ObMallocHookAttrGuard guard(ObMemAttr(MTL_ID(), "IntoOdps")); + try { + if (OB_FAIL(create_upload_session(external_properties.odps_format_, + external_partition, + is_overwrite, + upload))) { + LOG_WARN("failed to create upload session", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < parallel; i++) { + if (OB_UNLIKELY(!(record_writer = upload->OpenWriter(i, true)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_ISNULL(ptr = arena_alloc_.alloc(sizeof(OdpsUploader)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate uploader", K(ret), K(sizeof(OdpsUploader))); + } else { + uploader = new(ptr) OdpsUploader(); + uploader->record_writer_ = record_writer; + uploader->upload_ = upload; + } + if (OB_SUCC(ret) + && OB_FAIL(odps_mgr_map_.set_refactored(i, reinterpret_cast(uploader)))) { + LOG_WARN("failed to set refactored", K(ret), K(i)); + } + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when init odps tunnel", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when init odps tunnel", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when init odps tunnel", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + inited_ = true; + is_download_ = false; + ATOMIC_STORE(&ref_, parallel); + LOG_TRACE("succ to init odps uploader", K(ret), K(ref_)); + } + return ret; +} + +int ObOdpsPartitionDownloaderMgr::get_odps_downloader(int64_t part_id, apsara::odps::sdk::IDownloadPtr &downloader) +{ + int ret = OB_SUCCESS; + int64_t value = 0; + OdpsPartitionDownloader *odps_downloader = NULL; + if (OB_FAIL(odps_mgr_map_.get_refactored(part_id, value))) { + LOG_WARN("failed to get downloader", K(ret), K(part_id)); + } else if (OB_ISNULL(odps_downloader = reinterpret_cast(value))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected value", K(ret), K(part_id), K(value)); + } else if (OB_ISNULL(odps_downloader->odps_partition_downloader_.get())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected value", K(ret), K(part_id), K(value)); + } else { + downloader = odps_downloader->odps_partition_downloader_; + } + return ret; +} + +int ObOdpsPartitionDownloaderMgr::get_odps_uploader(int64_t task_id, + apsara::odps::sdk::IUploadPtr &upload, + apsara::odps::sdk::IRecordWriterPtr &record_writer) +{ + int ret = OB_SUCCESS; + int64_t value = 0; + OdpsUploader *uploader = NULL; + if (OB_FAIL(odps_mgr_map_.get_refactored(task_id, value))) { + LOG_WARN("failed to get uploader", K(ret), K(task_id)); + } else if (OB_ISNULL(uploader = reinterpret_cast(value))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected value", K(ret), K(value)); + } else if (OB_UNLIKELY(!uploader->upload_ || !uploader->record_writer_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + upload = uploader->upload_; + record_writer = uploader->record_writer_; + } + return ret; +} + +int ObOdpsPartitionDownloaderMgr::commit_upload() +{ + int ret = OB_SUCCESS; + std::vector blocks; + uint32_t block_id = 0; + uint32_t task_count = static_cast(odps_mgr_map_.size()); + OdpsUploader *uploader = NULL; + LOG_TRACE("debug select into commit upload begin"); + try { + for (common::hash::ObHashMap::iterator iter = odps_mgr_map_.begin(); + OB_SUCC(ret) && iter != odps_mgr_map_.end(); iter++) { + if (OB_ISNULL(uploader = reinterpret_cast(iter->second)) + || OB_UNLIKELY(!uploader->record_writer_ || !uploader->upload_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + uploader->record_writer_->Close(); + blocks.push_back(block_id); + block_id++; + // 所有线程都成功才commit + if (block_id == task_count && true == ATOMIC_LOAD(&need_commit_)) { + uploader->upload_->Commit(blocks); + } + uploader->~OdpsUploader(); + } + } + } catch (apsara::odps::sdk::OdpsException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when commit", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when commit", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("caught exception when commit", K(ret)); + } + } + LOG_TRACE("debug select into commit upload end"); + return ret; +} + +int ObOdpsPartitionDownloaderMgr::reset() +{ + int ret = OB_SUCCESS; + DeleteDownloaderFunc delete_func; + if (!inited_) { + // do nothing + } else if (is_download_ && OB_FAIL(odps_mgr_map_.foreach_refactored(delete_func))) { + LOG_WARN("failed to do foreach", K(ret)); + } else { + odps_mgr_map_.destroy(); + } + inited_ = false; + is_download_ = true; + return ret; +} + +int ObOdpsPartitionDownloaderMgr::DeleteDownloaderFunc::operator()(common::hash::HashMapPair &kv) +{ + int ret = OB_SUCCESS; + int64_t part_id = kv.first; + int64_t value = kv.second; + OdpsPartitionDownloader *downloader = reinterpret_cast(value); + if (OB_ISNULL(downloader)) { + // ignore ret + LOG_WARN("unexpected null ptr", K(ret), K(value), K(part_id));// ret is still OB_SUCCESS + } else { + downloader->reset(); + } + return ret; +} + +int ObOdpsPartitionDownloaderMgr::OdpsPartitionDownloader::reset() +{ + int ret = OB_SUCCESS; + try { + if (OB_ISNULL(odps_partition_downloader_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ptr", K(ret)); + } else { + odps_partition_downloader_->Complete(); + } + } catch (const apsara::odps::sdk::OdpsTunnelException& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (const std::exception& ex) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method", K(ret), K(ex.what())); + LOG_USER_ERROR(OB_ODPS_ERROR, ex.what()); + } + } catch (...) { + if (OB_SUCC(ret)) { + ret = OB_ODPS_ERROR; + LOG_WARN("odps exception occured when calling Complete method", K(ret)); + } + } + return ret; +} + +} // sql +} // oceanbase +#endif \ No newline at end of file diff --git a/src/sql/engine/table/ob_odps_table_row_iter.h b/src/sql/engine/table/ob_odps_table_row_iter.h new file mode 100644 index 0000000000..18628fd564 --- /dev/null +++ b/src/sql/engine/table/ob_odps_table_row_iter.h @@ -0,0 +1,286 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef __SQL_OB_ODPS_TABLE_ROW_ITER_H__ +#define __SQL_OB_ODPS_TABLE_ROW_ITER_H__ +#ifdef OB_BUILD_CPP_ODPS +#include +#include +#include "sql/engine/table/ob_external_table_access_service.h" +#include "sql/engine/cmd/ob_load_data_parser.h" +#include "lib/container/ob_se_array.h" +#include "lib/ob_errno.h" +#include "lib/hash/ob_hashmap.h" + +namespace oceanbase { +namespace sql { + +class ObODPSTableRowIterator : public ObExternalTableRowIterator { +public: + static const int64_t READER_HASH_MAP_BUCKET_NUM = 1 << 7; + static const int64_t ODPS_BLOCK_DOWNLOAD_SIZE = 1 << 18; +public: + struct StateValues { + StateValues() : + task_idx_(-1), + part_id_(0), + start_(0), + step_(0), + count_(0), + is_from_gi_pump_(false), + download_handle_(NULL), + record_reader_handle_(NULL) {} + int reuse(); + TO_STRING_KV(K(task_idx_), + K(part_id_), + K(start_), + K(step_), + K(count_), + K(is_from_gi_pump_), + K(ObString(part_spec_.c_str())), + K(ObString(download_id_.c_str()))); + int64_t task_idx_; + int64_t part_id_; + int64_t start_; + int64_t step_; + int64_t count_; + bool is_from_gi_pump_; + apsara::odps::sdk::IDownloadPtr download_handle_; + apsara::odps::sdk::IRecordReaderPtr record_reader_handle_; + std::string part_spec_; + std::string download_id_; + ObNewRow part_list_val_; + }; + struct OdpsPartition { + OdpsPartition() : + name_(""), + download_handle_(NULL), + download_id_(""), + record_count_(-1) + { + } + OdpsPartition(const std::string &name) : + name_(name), + download_handle_(NULL), + download_id_(""), + record_count_(-1) + { + } + OdpsPartition(const std::string &name, + apsara::odps::sdk::IDownloadPtr download_handle, + const std::string download_id, + int64_t &record_count) : + name_(name), + download_handle_(download_handle), + download_id_(download_id), + record_count_(record_count) + { + } + ~OdpsPartition() { + reset(); + } + int reset(); + TO_STRING_KV(K(ObString(name_.c_str())), K(record_count_)); + std::string name_; + apsara::odps::sdk::IDownloadPtr download_handle_; + std::string download_id_; + int64_t record_count_; + }; + + struct OdpsColumn { + OdpsColumn() {} + OdpsColumn(std::string name, apsara::odps::sdk::ODPSColumnTypeInfo type_info) : + name_(name), + type_info_(type_info) + { + } + std::string name_; + apsara::odps::sdk::ODPSColumnTypeInfo type_info_; + TO_STRING_KV(K(ObString(name_.c_str())), K(type_info_.mType), K(type_info_.mPrecision), K(type_info_.mScale), K(type_info_.mSpecifiedLength)); + }; +public: + ObODPSTableRowIterator() : + odps_format_(), + account_(), + conf_(), + tunnel_(), + odps_(NULL), + table_handle_(NULL), + state_(), + is_part_table_(false), + total_count_(0), + bit_vector_cache_(NULL), + record_(NULL), + records_(NULL), + batch_size_(-1), + get_next_task_(false) + { + mem_attr_ = ObMemAttr(MTL_ID(), "odpsrowiter"); + malloc_alloc_.set_attr(mem_attr_); + } + virtual ~ObODPSTableRowIterator() { + if (NULL != bit_vector_cache_) { + malloc_alloc_.free(bit_vector_cache_); + } + for (int64_t i = 0; i < batch_size_; ++i) { + records_[i].reset(); + } + if (NULL != records_) { + malloc_alloc_.free(records_); + } + record_.reset(); + records_ = NULL; + batch_size_ = -1; + get_next_task_ = false; + reset(); + } + virtual int init(const storage::ObTableScanParam *scan_param) override; + virtual int get_next_row() override; + virtual int get_next_rows(int64_t &count, int64_t capacity) override; + virtual int get_next_row(ObNewRow *&row) override { + UNUSED(row); + return common::OB_ERR_UNEXPECTED; + } + virtual void reset() override; + int init_tunnel(const sql::ObODPSGeneralFormat &odps_format); + int create_downloader(ObString &part_spec, apsara::odps::sdk::IDownloadPtr &downloader); + int pull_partition_info(); + inline ObIArray& get_partition_info() { return partition_list_; } + inline bool is_part_table() { return is_part_table_; } + static int check_type_static(const apsara::odps::sdk::ODPSColumnType odps_type, + const int32_t odps_type_length, + const int32_t odps_type_precision, + const int32_t odps_type_scale, + const ObObjType ob_type, + const int32_t ob_type_length, + const int32_t ob_type_precision, + const int32_t ob_type_scale); +private: + int inner_get_next_row(); + int prepare_expr(); + int pull_column(); + int next_task(); + int print_type_map_user_info(apsara::odps::sdk::ODPSColumnTypeInfo odps_type_info, + const ObExpr *ob_type_expr); + int check_type_static(apsara::odps::sdk::ODPSColumnTypeInfo odps_type_info, + const ObExpr *ob_type_expr); + inline bool text_type_length_is_valid_at_runtime(ObObjType type, int64_t odps_data_length) { + bool is_valid = false; + if (ObTinyTextType == type && odps_data_length < OB_MAX_TINYTEXT_LENGTH) { + is_valid = true; + } else if (ObTextType == type && odps_data_length < OB_MAX_TEXT_LENGTH) { + is_valid = true; + } else if (ObMediumTextType == type && odps_data_length < OB_MAX_MEDIUMTEXT_LENGTH) { + is_valid = true; + } else if (ObLongTextType == type && odps_data_length < OB_MAX_LONGTEXT_LENGTH) { + is_valid = true; + } + return is_valid; + } +private: + ObODPSGeneralFormat odps_format_; + apsara::odps::sdk::Account account_; + apsara::odps::sdk::Configuration conf_; + apsara::odps::sdk::OdpsTunnel tunnel_; + apsara::odps::sdk::IODPSPtr odps_; + apsara::odps::sdk::IODPSTablePtr table_handle_; + ObSEArray partition_list_; + ObSEArray column_list_; + ObSEArray target_column_id_list_; + StateValues state_; + bool is_part_table_; + int64_t total_count_; + ObBitVector *bit_vector_cache_; + apsara::odps::sdk::ODPSTableRecordPtr record_; + apsara::odps::sdk::ODPSTableRecordPtr *records_; + int64_t batch_size_; // -1 means not inited, 0 means call get_next_row(), > 0 means call get_next_rows() + bool get_next_task_; // only used for get next task and recall inner_get_next_row() when curren task was iter end. + common::ObMalloc malloc_alloc_; + common::ObArenaAllocator arena_alloc_; + common::ObMemAttr mem_attr_; +}; + +class ObOdpsPartitionDownloaderMgr +{ +public: + struct OdpsPartitionDownloader { + OdpsPartitionDownloader() : + odps_driver_(), + odps_partition_downloader_(NULL) + {} + ~OdpsPartitionDownloader() { + reset(); + } + int reset(); + ObODPSTableRowIterator odps_driver_; + apsara::odps::sdk::IDownloadPtr odps_partition_downloader_; + }; + class DeleteDownloaderFunc + { + public: + DeleteDownloaderFunc() {} + virtual ~DeleteDownloaderFunc() = default; + int operator()(common::hash::HashMapPair &kv); + }; + struct OdpsUploader { + OdpsUploader() : upload_(NULL), record_writer_(NULL) {} + ~OdpsUploader() { + upload_.reset(); + record_writer_.reset(); + } + apsara::odps::sdk::IUploadPtr upload_; + apsara::odps::sdk::IRecordWriterPtr record_writer_; + }; + ObOdpsPartitionDownloaderMgr() : inited_(false), is_download_(true), ref_(0), need_commit_(true) {} + int init_downloader(common::ObArray &external_table_files, + const ObString &properties); + int init_uploader(const ObString &properties, + const ObString &external_partition, + bool is_overwrite, + int64_t parallel); + static int create_upload_session(const sql::ObODPSGeneralFormat &odps_format, + const ObString &external_partition, + bool is_overwrite, + apsara::odps::sdk::IUploadPtr &upload); + int get_odps_downloader(int64_t part_id, apsara::odps::sdk::IDownloadPtr &downloader); + int get_odps_uploader(int64_t block_id, + apsara::odps::sdk::IUploadPtr &upload, + apsara::odps::sdk::IRecordWriterPtr &record_writer); + int commit_upload(); + int reset(); + OB_INLINE bool is_download_mgr_inited() { return inited_ && is_download_; } + inline int64_t inc_ref() + { + return ATOMIC_FAA(&ref_, 1); + } + inline int64_t dec_ref() + { + return ATOMIC_SAF(&ref_, 1); + } + inline void set_fail() + { + ATOMIC_STORE(&need_commit_, false); + } +private: + bool inited_; + bool is_download_; + common::hash::ObHashMap odps_mgr_map_; + common::ObArenaAllocator arena_alloc_; + int64_t ref_; + bool need_commit_; +}; + +} // sql +} // oceanbase + +#endif +#endif // __SQL_OB_ODPS_TABLE_ROW_ITER_H__ \ No newline at end of file diff --git a/src/sql/ob_sql_utils.cpp b/src/sql/ob_sql_utils.cpp index 2a11cc596c..299fb7feaa 100644 --- a/src/sql/ob_sql_utils.cpp +++ b/src/sql/ob_sql_utils.cpp @@ -53,6 +53,7 @@ #include "lib/charset/ob_charset.h" #include "pl/ob_pl_user_type.h" #include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/cmd/ob_load_data_parser.h" #ifdef OB_BUILD_SPM #include "sql/spm/ob_spm_controller.h" #endif @@ -1404,6 +1405,50 @@ int ObSQLUtils::check_and_copy_column_alias_name(const ObCollationType cs_type, return ret; } +int ObSQLUtils::extract_odps_part_spec(const ObString &all_part_spec, ObIArray &part_spec_list) +{ + int ret = OB_SUCCESS; + if (all_part_spec.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected empty odps part spec", K(ret)); + } else { + const char* start = all_part_spec.ptr(); + const char* end = start + all_part_spec.length(); + const char* ptr = NULL; + while (start < end && OB_SUCC(ret)) { + if (ptr == NULL && *start == '\'') { + ptr = start; + } else if (ptr != NULL && *start == '\'') { + int64_t len = start - ptr - 1; + if (0 == len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected part spec", K(ret), K(all_part_spec)); + } else if (OB_FAIL(part_spec_list.push_back(ObString(len, ptr + 1)))) { + LOG_WARN("failed to push back part_spec", K(ret)); + } + ptr = NULL; + } + ++start; + } + } + return ret; +} + +int ObSQLUtils::is_external_odps_table(const ObString &properties, ObIAllocator &allocator, bool &is_odps) +{ + int ret = OB_SUCCESS; + is_odps = false; + ObExternalFileFormat format; + if (properties.empty()) { + // do nothing + } else if (OB_FAIL(format.load_from_string(properties, allocator))) { + LOG_WARN("fail to load from properties string", K(ret), K(properties)); + } else { + is_odps = ObExternalFileFormat::FormatType::ODPS_FORMAT == format.format_type_; + } + return ret; +} + int ObSQLUtils::check_ident_name(const ObCollationType cs_type, ObString &name, const bool check_for_path_char, const int64_t max_ident_len) { @@ -5417,7 +5462,7 @@ void ObSQLUtils::adjust_time_by_ntp_offset(int64_t &dst_timeout_ts) bool ObSQLUtils::is_external_files_on_local_disk(const ObString &url) { - return url.prefix_match_ci(OB_FILE_PREFIX); + return url.empty() ? false : url.prefix_match_ci(OB_FILE_PREFIX); } int ObSQLUtils::split_remote_object_storage_url(ObString &url, ObBackupStorageInfo &storage_info) diff --git a/src/sql/ob_sql_utils.h b/src/sql/ob_sql_utils.h index 52618c7cf2..f79a794de9 100644 --- a/src/sql/ob_sql_utils.h +++ b/src/sql/ob_sql_utils.h @@ -715,6 +715,8 @@ public: static int64_t combine_server_id(int64_t ts, uint64_t server_id) { return (ts & ((1LL << 43) - 1LL)) | ((server_id & 0xFFFF) << 48); } + static int extract_odps_part_spec(const ObString &all_part_spec, ObIArray &part_spec_list); + static int is_external_odps_table(const ObString &properties, ObIAllocator &allocator, bool &is_odps); static int check_ident_name(const common::ObCollationType cs_type, common::ObString &name, const bool check_for_path_char, const int64_t max_ident_len); diff --git a/src/sql/optimizer/ob_insert_log_plan.cpp b/src/sql/optimizer/ob_insert_log_plan.cpp index c912be7916..4ee1c71bbb 100644 --- a/src/sql/optimizer/ob_insert_log_plan.cpp +++ b/src/sql/optimizer/ob_insert_log_plan.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SQL_OPT #include "sql/resolver/dml/ob_insert_stmt.h" #include "sql/optimizer/ob_log_insert.h" +#include "sql/optimizer/ob_log_select_into.h" #include "sql/optimizer/ob_insert_log_plan.h" #include "sql/optimizer/ob_log_operator_factory.h" #include "sql/optimizer/ob_log_plan_factory.h" @@ -126,21 +127,32 @@ int ObInsertLogPlan::generate_normal_raw_plan() osg_info->online_sample_rate_ = online_sample_percent; } } - if (OB_SUCC(ret)) { - if (OB_FAIL(prepare_dml_infos())) { - LOG_WARN("failed to prepare dml infos", K(ret)); - } else if (use_pdml()) { - if (OB_FAIL(candi_allocate_pdml_insert(osg_info))) { - LOG_WARN("failed to allocate pdml insert", K(ret)); + TableItem *insert_table = NULL; + if (OB_ISNULL(insert_table = insert_stmt->get_table_item_by_id(insert_stmt->get_insert_table_info().table_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("insert target table is unexpected null", K(ret)); + } else if (schema::EXTERNAL_TABLE == insert_table->table_type_) { + if (OB_FAIL(candi_allocate_select_into_for_insert())) { + LOG_WARN("failed to allocate select into op", K(ret)); } else { - LOG_TRACE("succeed to allocate pdml insert operator", - K(candidates_.candidate_plans_.count())); + LOG_TRACE("succeed to allocate select into clause", K(candidates_.candidate_plans_.count())); } - } else if (OB_FAIL(candi_allocate_insert(osg_info))) { - LOG_WARN("failed to allocate insert operator", K(ret)); } else { - LOG_TRACE("succeed to allocate insert operator", K(candidates_.candidate_plans_.count())); + if (OB_FAIL(prepare_dml_infos())) { + LOG_WARN("failed to prepare dml infos", K(ret)); + } else if (use_pdml()) { + if (OB_FAIL(candi_allocate_pdml_insert(osg_info))) { + LOG_WARN("failed to allocate pdml insert", K(ret)); + } else { + LOG_TRACE("succeed to allocate pdml insert operator", + K(candidates_.candidate_plans_.count())); + } + } else if (OB_FAIL(candi_allocate_insert(osg_info))) { + LOG_WARN("failed to allocate insert operator", K(ret)); + } else { + LOG_TRACE("succeed to allocate insert operator", K(candidates_.candidate_plans_.count())); + } } } if (OB_SUCC(ret) && insert_stmt->get_returning_aggr_item_size() > 0) { @@ -1774,4 +1786,96 @@ int ObInsertLogPlan::get_online_estimate_percent(double &percent) LOG_WARN("failed to get sys online estimate percent", K(ret)); } return ret; -} \ No newline at end of file +} +int ObInsertLogPlan::candi_allocate_select_into_for_insert() +{ + int ret = OB_SUCCESS; + ObExchangeInfo exch_info; + CandidatePlan candidate_plan; + ObSEArray select_into_plans; + int64_t dml_parallel = ObGlobalHint::UNSET_PARALLEL; + if (OB_FAIL(get_parallel_info_from_candidate_plans(dml_parallel))) { + LOG_WARN("failed to get parallel info from candidate plans", K(ret)); + } else if (dml_parallel > 1) { + exch_info.dist_method_ = ObPQDistributeMethod::RANDOM; + } + for (int64_t i = 0 ; OB_SUCC(ret) && i < candidates_.candidate_plans_.count(); ++i) { + candidate_plan = candidates_.candidate_plans_.at(i); + if (OB_ISNULL(candidate_plan.plan_tree_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (candidate_plan.plan_tree_->is_sharding() + && OB_FAIL((allocate_exchange_as_top(candidate_plan.plan_tree_, exch_info)))) { + LOG_WARN("failed to allocate exchange as top", K(ret)); + } else if (OB_FAIL(allocate_select_into_as_top_for_insert(candidate_plan.plan_tree_))) { + LOG_WARN("failed to allocate select into", K(ret)); + } else if (OB_FAIL(select_into_plans.push_back(candidate_plan))) { + LOG_WARN("failed to push back candidate plan", K(ret)); + } else { /*do nothing*/ } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(prune_and_keep_best_plans(select_into_plans))) { + LOG_WARN("failed to prune and keep best plans", K(ret)); + } else { /*do nothing*/ } + } + return ret; +} + +int ObInsertLogPlan::allocate_select_into_as_top_for_insert(ObLogicalOperator *&old_top) +{ + int ret = OB_SUCCESS; + ObLogSelectInto *select_into = NULL; + ObSchemaGetterGuard *schema_guard = NULL; + const ObTableSchema *table_schema = NULL; + ObSQLSessionInfo *session_info = NULL; + const ObInsertStmt *stmt = get_stmt(); + if (OB_ISNULL(old_top) || OB_ISNULL(stmt) + || OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard()) + || OB_ISNULL(session_info = get_optimizer_context().get_session_info()) + || stmt->get_table_items().count() != 2 + || OB_ISNULL(stmt->get_table_item(0)) || OB_ISNULL(stmt->get_table_item(1))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Get unexpected null", K(ret), K(old_top), K(schema_guard), K(session_info), K(stmt)); + } else if (OB_FAIL(schema_guard->get_table_schema(session_info->get_effective_tenant_id(), + stmt->get_insert_table_info().ref_table_id_, + table_schema))) { + LOG_WARN("get table schema from schema guard failed", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_ISNULL(select_into = static_cast( + get_log_op_factory().allocate(*this, LOG_SELECT_INTO)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for ObLogSelectInto failed", K(ret)); + } else { + ObString external_properties; + const ObString &format_or_properties = table_schema->get_external_file_format().empty() + ? table_schema->get_external_properties() + : table_schema->get_external_file_format(); + const ObInsertTableInfo& table_info = stmt->get_insert_table_info(); + if (format_or_properties.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("external properties is empty", K(ret)); + } else if (table_schema->get_external_properties().empty()) { //目前只支持写odps外表 其他类型暂不支持 + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support to insert into external table which is not in odps", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "insert into external table which is not in odps"); + } else if (OB_FAIL(ob_write_string(get_allocator(), format_or_properties, external_properties))) { + LOG_WARN("failed to append string", K(ret)); + } else if (OB_FAIL(select_into->get_select_exprs().assign(table_info.column_conv_exprs_))) { + LOG_WARN("failed to get select exprs", K(ret)); + } else { + select_into->set_is_overwrite(stmt->is_overwrite()); + select_into->set_external_properties(external_properties); + select_into->set_external_partition(stmt->get_table_item(0)->external_table_partition_); + select_into->set_child(ObLogicalOperator::first_child, old_top); + // compute property + if (OB_FAIL(select_into->compute_property())) { + LOG_WARN("failed to compute equal set", K(ret)); + } else { + old_top = select_into; + } + } + } + return ret; +} diff --git a/src/sql/optimizer/ob_insert_log_plan.h b/src/sql/optimizer/ob_insert_log_plan.h index 207ae31829..0d68fada89 100644 --- a/src/sql/optimizer/ob_insert_log_plan.h +++ b/src/sql/optimizer/ob_insert_log_plan.h @@ -74,6 +74,10 @@ protected: int candi_allocate_pdml_insert(OSGShareInfo *osg_info); int candi_allocate_optimizer_stats_merge(OSGShareInfo *osg_info); + /** @brief Allocate SELECTINTO on top of plan candidates when insert into external table*/ + int candi_allocate_select_into_for_insert(); + int allocate_select_into_as_top_for_insert(ObLogicalOperator *&old_top); + int get_osg_type(bool is_multi_part_dml, ObShardingInfo *insert_table_sharding, int64_t distributed_method, diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 11723a8f3e..19e49531f7 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -142,7 +142,8 @@ ObLogPlan::ObLogPlan(ObOptimizerContext &ctx, const ObDMLStmt *stmt) selectivity_ctx_(ctx, this, stmt), alloc_sfu_list_(), onetime_copier_(NULL), - nonrecursive_plan_for_fake_cte_(NULL) + nonrecursive_plan_for_fake_cte_(NULL), + has_allocated_range_shuffle_(false) { } @@ -6957,19 +6958,22 @@ int ObLogPlan::allocate_sort_and_exchange_as_top(ObLogicalOperator *&top, bool has_select_into = false; bool is_single = true; bool has_order_by = false; + ObRawExpr* partition_expr = NULL; if (OB_ISNULL(top) || OB_ISNULL(get_stmt())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); - } else if (OB_FAIL(check_select_into(has_select_into, is_single, has_order_by))) { + } else if (OB_FAIL(check_select_into(has_select_into, is_single, has_order_by, partition_expr))) { LOG_WARN("failed to check select into", K(ret)); } else if (exch_info.is_pq_local() && NULL == topn_expr && has_select_into && !is_single - && has_order_by) { + && has_order_by && (NULL == partition_expr || partition_expr->is_const_expr())) { if (OB_FAIL(allocate_dist_range_sort_for_select_into(top, sort_keys, need_sort, is_local_order))) { LOG_WARN("failed to allocate dist range sort as top", K(ret)); - } else { /*do nothing*/ } + } else { + has_allocated_range_shuffle_ = true; + } } else if (exch_info.is_pq_local() && NULL == topn_expr && GCONF._enable_px_ordered_coord) { if (OB_FAIL(allocate_dist_range_sort_as_top(top, sort_keys, need_sort, is_local_order))) { LOG_WARN("failed to allocate dist range sort as top", K(ret)); @@ -7576,11 +7580,16 @@ int ObLogPlan::allocate_limit_as_top(ObLogicalOperator *&old_top, return ret; } -int ObLogPlan::check_select_into(bool &has_select_into, bool &is_single, bool &has_order_by){ +int ObLogPlan::check_select_into(bool &has_select_into, + bool &is_single, + bool &has_order_by, + ObRawExpr *&file_partition_expr) +{ int ret = OB_SUCCESS; has_select_into = false; is_single = true; has_order_by = false; + file_partition_expr = NULL; ObSelectIntoItem *into_item = NULL; if (OB_ISNULL(get_stmt())) { ret = OB_ERR_UNEXPECTED; @@ -7590,11 +7599,11 @@ int ObLogPlan::check_select_into(bool &has_select_into, bool &is_single, bool &h } else { const ObSelectStmt *stmt = static_cast(get_stmt()); has_select_into = stmt->has_select_into(); - into_item = stmt->get_select_into(); - if (NULL != into_item && !into_item->is_single_) { - is_single = false; - } has_order_by = stmt->has_order_by(); + if (NULL != (into_item = stmt->get_select_into())) { + is_single = into_item->is_single_; + file_partition_expr = into_item->file_partition_expr_; + } } return ret; } @@ -7606,12 +7615,16 @@ int ObLogPlan::candi_allocate_select_into() bool has_select_into = false; bool is_single = true; bool has_order_by = false; + ObRawExpr* partition_expr = NULL; + ObSEArray partition_exprs; CandidatePlan candidate_plan; ObSEArray select_into_plans; - if (OB_FAIL(check_select_into(has_select_into, is_single, has_order_by))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret)); - } else if (!is_single && !has_order_by) { + if (OB_FAIL(check_select_into(has_select_into, is_single, has_order_by, partition_expr))) { + LOG_WARN("failed to check select into", K(ret)); + } else if (partition_expr != NULL && !partition_expr->is_const_expr() + && OB_FAIL(partition_exprs.push_back(partition_expr))) { + LOG_WARN("failed to push back partition expr", K(ret)); + } else if (!is_single && !has_order_by && partition_exprs.count() == 0) { exch_info.dist_method_ = ObPQDistributeMethod::RANDOM; } for (int64_t i = 0 ; OB_SUCC(ret) && i < candidates_.candidate_plans_.count(); ++i) { @@ -7619,7 +7632,12 @@ int ObLogPlan::candi_allocate_select_into() if (OB_ISNULL(candidate_plan.plan_tree_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); - } else if (!has_order_by && candidate_plan.plan_tree_->is_sharding() + } else if (!is_single && !has_order_by && partition_exprs.count() != 0 + && OB_FAIL(get_grouping_style_exchange_info(partition_exprs, + candidate_plan.plan_tree_->get_output_equal_sets(), + exch_info))) { + LOG_WARN("failed to get grouping style exchange info", K(ret)); + } else if (!has_allocated_range_shuffle_ && candidate_plan.plan_tree_->is_sharding() && OB_FAIL((allocate_exchange_as_top(candidate_plan.plan_tree_, exch_info)))) { LOG_WARN("failed to allocate exchange as top", K(ret)); } else if (OB_FAIL(allocate_select_into_as_top(candidate_plan.plan_tree_))) { @@ -7647,11 +7665,10 @@ int ObLogPlan::allocate_select_into_as_top(ObLogicalOperator *&old_top) } else if (OB_ISNULL(select_into = static_cast( get_log_op_factory().allocate(*this, LOG_SELECT_INTO)))) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("allocate memory for ObLogSelectInto failed", K(ret)); + LOG_WARN("allocate memory for ObLogSelectInto failed", K(ret)); } else { ObSelectIntoItem *into_item = stmt->get_select_into(); ObSEArray select_exprs; - ObRawExpr *to_outfile_expr = NULL; if (OB_ISNULL(into_item)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("into item is null", K(ret)); @@ -7669,9 +7686,11 @@ int ObLogPlan::allocate_select_into_as_top(ObLogicalOperator *&old_top) select_into->set_closed_cht(into_item->closed_cht_); select_into->set_is_single(into_item->is_single_); select_into->set_max_file_size(into_item->max_file_size_); + select_into->set_buffer_size(into_item->buffer_size_); select_into->set_escaped_cht(into_item->escaped_cht_); select_into->set_cs_type(into_item->cs_type_); select_into->set_child(ObLogicalOperator::first_child, old_top); + select_into->set_file_partition_expr(into_item->file_partition_expr_); // compute property if (OB_FAIL(select_into->compute_property())) { LOG_WARN("failed to compute equal set", K(ret)); diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index 8a8aabdaa9..bcb7f820dd 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -989,7 +989,10 @@ public: int allocate_select_into_as_top(ObLogicalOperator *&old_top); - int check_select_into(bool &has_select_into, bool &is_single, bool &has_order_by); + int check_select_into(bool &has_select_into, + bool &is_single, + bool &has_order_by, + ObRawExpr *&file_partition_expr); int allocate_expr_values_as_top(ObLogicalOperator *&top, const ObIArray *filter_exprs = NULL); @@ -1889,6 +1892,46 @@ private: common::ObSEArray new_or_quals_; ObSelectLogPlan *nonrecursive_plan_for_fake_cte_; + + // has_allocated_range_shuffle_ is a flag for select into + // when flag = true, logical plan is like + // select into + // | + // sort + // | + // exchange in distr + // | + // exchange out distr(range) + // condition: partition expr is null or const expr, single is false, has order by without limit + // + // when flag = false, logical plan is like + // select into + // | + // exchange in distr + // | + // exchange out distr(random) + // condition: single is false, no order by, partition expr is null or const expr + // + // or + // + // select into + // | + // exchange in distr + // | + // exchange out distr(hash) + // condition: single is false, no order by, partition expr is not const expr + // + // or + // + // select into + // | + // px coordinator + // | + // exchange out distr + // condition: single is true / parallel = 1 / has limit / has order by and partition by + // + // 为select into分配了range shuffle后, 在分配select into算子时不应再分配exchange算子 + bool has_allocated_range_shuffle_; DISALLOW_COPY_AND_ASSIGN(ObLogPlan); }; diff --git a/src/sql/optimizer/ob_log_select_into.cpp b/src/sql/optimizer/ob_log_select_into.cpp index 5e6065b762..31224bc766 100644 --- a/src/sql/optimizer/ob_log_select_into.cpp +++ b/src/sql/optimizer/ob_log_select_into.cpp @@ -62,6 +62,8 @@ int ObLogSelectInto::get_op_exprs(ObIArray &all_exprs) int ret = OB_SUCCESS; if (OB_FAIL(append(all_exprs, select_exprs_))) { LOG_WARN("failed to push back select exprs", K(ret)); + } else if (file_partition_expr_ != NULL && OB_FAIL(all_exprs.push_back(file_partition_expr_))) { + LOG_WARN("failed to push back file partition expr", K(ret)); } else if (OB_FAIL(ObLogicalOperator::get_op_exprs(all_exprs))) { LOG_WARN("failed to get op exprs", K(ret)); } else { /*do nothing*/ } diff --git a/src/sql/optimizer/ob_log_select_into.h b/src/sql/optimizer/ob_log_select_into.h index 5fb32f45a8..22db6b80b2 100644 --- a/src/sql/optimizer/ob_log_select_into.h +++ b/src/sql/optimizer/ob_log_select_into.h @@ -16,6 +16,7 @@ #include "sql/optimizer/ob_logical_operator.h" #include "sql/optimizer/ob_log_operator_factory.h" #include "objit/common/ob_item_type.h" +#include "sql/engine/cmd/ob_load_data_parser.h" namespace oceanbase { @@ -36,7 +37,12 @@ public: is_optional_(true), is_single_(true), max_file_size_(DEFAULT_MAX_FILE_SIZE), - escaped_cht_() + buffer_size_(DEFAULT_BUFFER_SIZE), + escaped_cht_(), + file_partition_expr_(NULL), + is_overwrite_(false), + external_properties_(), + external_partition_() { cs_type_ = ObCharset::get_system_collation(); } @@ -78,18 +84,38 @@ public: { max_file_size_ = max_file_size; } - inline void set_closed_cht(common::ObObj closed_cht) + inline void set_buffer_size(int64_t buffer_size) + { + buffer_size_ = buffer_size; + } + inline void set_closed_cht(common::ObObj &closed_cht) { closed_cht_ = closed_cht; } - inline void set_escaped_cht(common::ObObj escaped_cht) + inline void set_escaped_cht(common::ObObj &escaped_cht) { escaped_cht_ = escaped_cht; } - inline void set_cs_type(common::ObCollationType cs_type) + inline void set_cs_type(common::ObCollationType &cs_type) { cs_type_ = cs_type; } + inline void set_file_partition_expr(sql::ObRawExpr* file_partition_expr) + { + file_partition_expr_ = file_partition_expr; + } + inline void set_is_overwrite(bool is_overwrite) + { + is_overwrite_ = is_overwrite; + } + inline void set_external_properties(const common::ObString &external_properties) + { + external_properties_.assign_ptr(external_properties.ptr(), external_properties.length()); + } + inline void set_external_partition(const common::ObString &external_partition) + { + external_partition_.assign_ptr(external_partition.ptr(), external_partition.length()); + } inline ObItemType get_into_type() const { return into_type_; @@ -122,6 +148,10 @@ public: { return max_file_size_; } + inline int64_t get_buffer_size() const + { + return buffer_size_; + } inline common::ObObj get_closed_cht() const { return closed_cht_; @@ -134,6 +164,22 @@ public: { return cs_type_; } + inline sql::ObRawExpr* get_file_partition_expr() const + { + return file_partition_expr_; + } + inline bool get_is_overwrite() const + { + return is_overwrite_; + } + inline common::ObString get_external_properties() const + { + return external_properties_; + } + inline common::ObString get_external_partition() const + { + return external_partition_; + } const common::ObIArray &get_select_exprs() const { return select_exprs_; } common::ObIArray &get_select_exprs() { return select_exprs_; } virtual int est_cost() override; @@ -141,6 +187,7 @@ public: virtual int get_op_exprs(ObIArray &all_exprs) override; virtual int inner_replace_op_exprs(ObRawExprReplacer &replacer); static const int64_t DEFAULT_MAX_FILE_SIZE = 256*1024*1024; + static const int64_t DEFAULT_BUFFER_SIZE = 1*1024*1024; private: ObItemType into_type_; common::ObObj outfile_name_; @@ -152,8 +199,13 @@ private: bool is_optional_; bool is_single_; int64_t max_file_size_; + int64_t buffer_size_; common::ObObj escaped_cht_; common::ObCollationType cs_type_; + sql::ObRawExpr* file_partition_expr_; + bool is_overwrite_; + common::ObString external_properties_; + common::ObString external_partition_; }; } } diff --git a/src/sql/parser/non_reserved_keywords_mysql_mode.c b/src/sql/parser/non_reserved_keywords_mysql_mode.c index 78eec77c6c..3bc066704b 100644 --- a/src/sql/parser/non_reserved_keywords_mysql_mode.c +++ b/src/sql/parser/non_reserved_keywords_mysql_mode.c @@ -101,6 +101,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"buckets", BUCKETS}, {"backup_copies", BACKUP_COPIES}, {"badfile", BADFILE}, + {"buffer_size", BUFFER_SIZE}, {"cache", CACHE}, {"calibration", CALIBRATION}, {"calibration_info", CALIBRATION_INFO}, @@ -594,6 +595,13 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"obconfig_url", OBCONFIG_URL}, {"object", OBJECT}, {"object_id", OBJECT_ID}, + {"accessid", ACCESSID}, + {"accesskey", ACCESSKEY}, + {"accesstype", ACCESSTYPE}, + {"endpoint", ENDPOINT}, + {"project_name", PROJECT_NAME}, + {"quota_name", QUOTA_NAME}, + {"compression_code", COMPRESSION_CODE}, {"of", OF}, {"off", OFF}, {"offset", OFFSET}, @@ -668,6 +676,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"profiles", PROFILES}, {"progressive_merge_num", PROGRESSIVE_MERGE_NUM}, {"protection", PROTECTION}, + {"properties", PROPERTIES}, {"proxy", PROXY}, {"public", PUBLIC}, {"purge", PURGE}, @@ -852,6 +861,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"ssl", SSL}, {"stacked", STACKED}, {"standby", STANDBY}, + {"ststoken", STSTOKEN}, {"start", START}, {"starts", STARTS}, {"starting", STARTING}, diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index 4f2035dd43..41146b3405 100644 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -261,7 +261,7 @@ END_P SET_VAR DELIMITER //-----------------------------reserved keyword end------------------------------------------------- %token //-----------------------------non_reserved keyword begin------------------------------------------- - ACCESS ACCOUNT ACTION ACTIVE ADDDATE AFTER AGAINST AGGREGATE ALGORITHM ALL_META ALL_USER ALWAYS ALLOW ANALYSE ANY + ACCESS ACCESSID ACCESSKEY ACCESSTYPE ACCOUNT ACTION ACTIVE ADDDATE AFTER AGAINST AGGREGATE ALGORITHM ALL_META ALL_USER ALWAYS ALLOW ANALYSE ANY APPROX_COUNT_DISTINCT APPROX_COUNT_DISTINCT_SYNOPSIS APPROX_COUNT_DISTINCT_SYNOPSIS_MERGE ARBITRATION ARRAY ASCII ASIS AT AUTHORS AUTO AUTOEXTEND_SIZE AUTO_INCREMENT AUTO_INCREMENT_MODE AUTO_INCREMENT_CACHE_SIZE AVG AVG_ROW_LENGTH ACTIVATE AVAILABILITY ARCHIVELOG ASYNCHRONOUS AUDIT ADMIN AUTO_REFRESH @@ -269,12 +269,12 @@ END_P SET_VAR DELIMITER BACKUP BACKUP_COPIES BALANCE BANDWIDTH BASE BASELINE BASELINE_ID BASIC BEGI BINDING SHARDING BINLOG BIT BIT_AND BIT_OR BIT_XOR BLOCK BLOCK_INDEX BLOCK_SIZE BLOOM_FILTER BOOL BOOLEAN BOOTSTRAP BTREE BYTE BREADTH BUCKETS BISON_LIST BACKUPSET BACKED BACKUPPIECE BACKUP_BACKUP_DEST BACKUPROUND - BADFILE + BADFILE BUFFER_SIZE CACHE CALIBRATION CALIBRATION_INFO CANCEL CASCADED CAST CATALOG_NAME CHAIN CHANGED CHARSET CHECKSUM CHECKPOINT CHUNK CIPHER CLASS_ORIGIN CLEAN CLEAR CLIENT CLONE CLOG CLOSE CLUSTER CLUSTER_ID CLUSTER_NAME COALESCE COLUMN_STAT CODE COLLATION COLUMN_FORMAT COLUMN_NAME COLUMNS COMMENT COMMIT COMMITTED COMPACT COMPLETION COMPLETE - COMPRESSED COMPRESSION COMPUTATION COMPUTE CONCURRENT CONDENSED CONDITIONAL CONNECTION CONSISTENT CONSISTENT_MODE CONSTRAINT_CATALOG + COMPRESSED COMPRESSION COMPRESSION_CODE COMPUTATION COMPUTE CONCURRENT CONDENSED CONDITIONAL CONNECTION CONSISTENT CONSISTENT_MODE CONSTRAINT_CATALOG CONSTRAINT_NAME CONSTRAINT_SCHEMA CONTAINS CONTEXT CONTRIBUTORS COPY COUNT CPU CREATE_TIMESTAMP CTXCAT CTX_ID CUBE CURDATE CURRENT STACKED CURTIME CURSOR_NAME CUME_DIST CYCLE CALC_PARTITION_ID CONNECT @@ -283,7 +283,7 @@ END_P SET_VAR DELIMITER DIRECTORY DISABLE DISALLOW DISCARD DISK DISKGROUP DO DOT DUMP DUMPFILE DUPLICATE DUPLICATE_SCOPE DYNAMIC DATABASE_ID DEFAULT_TABLEGROUP DISCONNECT DEMAND - EFFECTIVE EMPTY ENABLE ENABLE_ARBITRATION_SERVICE ENABLE_EXTENDED_ROWID ENCRYPTED ENCRYPTION END ENDS ENFORCED ENGINE_ ENGINES ENUM ENTITY ERROR_CODE ERROR_P ERRORS ESTIMATE + EFFECTIVE EMPTY ENABLE ENABLE_ARBITRATION_SERVICE ENABLE_EXTENDED_ROWID ENCRYPTED ENCRYPTION END ENDPOINT ENDS ENFORCED ENGINE_ ENGINES ENUM ENTITY ERROR_CODE ERROR_P ERRORS ESTIMATE ESCAPE EVENT EVENTS EVERY EXCHANGE EXCLUDING EXECUTE EXPANSION EXPIRE EXPIRE_INFO EXPORT OUTLINE EXTENDED EXTENDED_NOADDR EXTENT_SIZE EXTRACT EXCEPT EXPIRED ENCODING EMPTY_FIELD_AS_NULL EXTERNAL @@ -330,11 +330,11 @@ END_P SET_VAR DELIMITER PACK_KEYS PAGE PARALLEL PARAMETERS PARSER PARTIAL PARTITION_ID PARTITIONING PARTITIONS PASSWORD PATH PAUSE PAXOS_REPLICA_NUM PERCENTAGE PERCENT_RANK PHASE PLAN PHYSICAL PLANREGRESS PLUGIN PLUGIN_DIR PLUGINS POINT POLYGON PERFORMANCE - PROTECTION PRIORITY PL POLICY POOL PORT POSITION PREPARE PRESERVE PRETTY PRETTY_COLOR PREV PRIMARY_ZONE PRIVILEGES PROCESS - PROCESSLIST PROFILE PROFILES PROXY PRECEDING PCTFREE P_ENTITY P_CHUNK + PROTECTION PROJECT_NAME PRIORITY PL POLICY POOL PORT POSITION PREPARE PRESERVE PRETTY PRETTY_COLOR PREV PRIMARY_ZONE PRIVILEGES PROCESS + PROCESSLIST PROFILE PROFILES PROPERTIES PROXY PRECEDING PCTFREE P_ENTITY P_CHUNK PUBLIC PROGRESSIVE_MERGE_NUM PREVIEW PS PLUS PATTERN PARTITION_TYPE - QUARTER QUERY QUERY_RESPONSE_TIME QUEUE_TIME QUICK + QUARTER QUERY QUERY_RESPONSE_TIME QUEUE_TIME QUICK QUOTA_NAME RB_AND_AGG RB_BUILD_AGG RB_OR_AGG REBUILD RECOVER RECOVERY_WINDOW RECYCLE REDO_BUFFER_SIZE REDOFILE REDUNDANCY REDUNDANT REFRESH REGION RELAY RELAYLOG RELAY_LOG_FILE RELAY_LOG_POS RELAY_THREAD RELOAD REMAP REMOVE REORGANIZE REPAIR REPEATABLE REPLICA @@ -350,7 +350,7 @@ END_P SET_VAR DELIMITER SOURCE SPFILE SPLIT SQL_AFTER_GTIDS SQL_AFTER_MTS_GAPS SQL_BEFORE_GTIDS SQL_BUFFER_RESULT SQL_CACHE SQL_NO_CACHE SQL_ID SCHEMA_ID SQL_THREAD SQL_TSI_DAY SQL_TSI_HOUR SQL_TSI_MINUTE SQL_TSI_MONTH SQL_TSI_QUARTER SQL_TSI_SECOND SQL_TSI_WEEK SQL_TSI_YEAR SRID STANDBY _ST_ASMVT STAT START STARTS STATS_AUTO_RECALC - STATS_PERSISTENT STATS_SAMPLE_PAGES STATUS STATEMENTS STATISTICS STD STDDEV STDDEV_POP STDDEV_SAMP STRONG + STATS_PERSISTENT STATS_SAMPLE_PAGES STATUS STATEMENTS STATISTICS STD STDDEV STDDEV_POP STDDEV_SAMP STRONG STSTOKEN SYNCHRONIZATION SYNCHRONOUS STOP STORAGE STORAGE_FORMAT_VERSION STORE STORING STRING SUBCLASS_ORIGIN SUBDATE SUBJECT SUBPARTITION SUBPARTITIONS SUBSTR SUBSTRING SUCCESSFUL SUM SUPER SUSPEND SWAPS SWITCH SWITCHES SWITCHOVER SYSTEM SYSTEM_USER SYSDATE SESSION_ALIAS @@ -501,7 +501,7 @@ END_P SET_VAR DELIMITER %type lock_tables_stmt unlock_tables_stmt lock_type lock_table_list lock_table opt_local %type flashback_stmt purge_stmt opt_flashback_rename_table opt_flashback_rename_database opt_flashback_rename_tenant %type tenant_name_list opt_tenant_list tenant_list_tuple cache_type flush_scope opt_zone_list -%type into_opt into_clause field_opt field_term field_term_list line_opt line_term line_term_list into_var_list into_var file_opt file_option_list file_option file_size_const +%type into_opt into_clause field_opt field_term field_term_list line_opt line_term line_term_list into_var_list into_var file_partition_opt file_opt file_option_list file_option file_size_const %type string_list text_string string_val_list %type balance_task_type opt_balance_task_type %type list_expr list_partition_element list_partition_expr list_partition_list list_partition_option opt_list_partition_list opt_list_subpartition_list list_subpartition_list list_subpartition_element drop_partition_name_list @@ -537,8 +537,9 @@ END_P SET_VAR DELIMITER %type opt_storage_name opt_calibration_list calibration_info_list %type switchover_tenant_stmt switchover_clause opt_verify %type recover_tenant_stmt recover_point_clause -%type external_file_format_list external_file_format external_table_partition_option +%type external_file_format_list external_file_format external_properties_list external_properties external_table_partition_option %type dynamic_sampling_hint add_external_table_partition_actions add_external_table_partition_action +%type external_table_partitions external_table_partition %type skip_index_type opt_skip_index_type_list %type opt_rebuild_column_store %type json_table_expr mock_jt_on_error_on_empty jt_column_list json_table_column_def @@ -7248,6 +7249,11 @@ TABLE_MODE opt_equal_mark STRING_VALUE (void)($2) ; /* make bison mute */ merge_nodes($$, result, T_EXTERNAL_FILE_FORMAT, $4); } +| PROPERTIES opt_equal_mark '(' external_properties_list ')' +{ + (void)($2) ; /* make bison mute */ + merge_nodes($$, result, T_EXTERNAL_PROPERTIES, $4); +} | PATTERN opt_equal_mark STRING_VALUE { (void)($2) ; /* make bison mute */ @@ -8320,6 +8326,73 @@ REDUNDANT } ; +external_properties_list: +external_properties +{ + $$ = $1; +} +| external_properties_list opt_comma external_properties +{ + (void) ($2); + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); +} +; + +external_properties: +TYPE COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_EXTERNAL_FILE_FORMAT_TYPE, 1, $3); +} +| ACCESSTYPE COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_ACCESSTYPE, 1, $3); +} +| ACCESSID COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_ACCESSID, 1, $3); + $3->stmt_loc_.first_column_ = @3.first_column - 1; + $3->stmt_loc_.last_column_ = @3.last_column - 1; + result->contain_sensitive_data_ = true; +} +| ACCESSKEY COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_ACCESSKEY, 1, $3); + $3->stmt_loc_.first_column_ = @3.first_column - 1; + $3->stmt_loc_.last_column_ = @3.last_column - 1; + result->contain_sensitive_data_ = true; +} +| STSTOKEN COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_STSTOKEN, 1, $3); + $3->stmt_loc_.first_column_ = @3.first_column - 1; + $3->stmt_loc_.last_column_ = @3.last_column - 1; + result->contain_sensitive_data_ = true; +} +| ENDPOINT COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_ENDPOINT, 1, $3); +} +| PROJECT_NAME COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_PROJECT, 1, $3); +} +| SCHEMA_NAME COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_SCHEMA, 1, $3); +} +| QUOTA_NAME COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_QUOTA, 1, $3); +} +| COMPRESSION_CODE COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_COMPRESSION_CODE, 1, $3); +} +| TABLE_NAME COMP_EQ STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_TABLE, 1, $3); +}; + external_file_format_list: external_file_format { @@ -10429,9 +10502,9 @@ LIMIT limit_expr OFFSET limit_expr ; into_clause: -INTO OUTFILE STRING_VALUE opt_charset field_opt line_opt file_opt +INTO OUTFILE STRING_VALUE file_partition_opt opt_charset field_opt line_opt file_opt { - malloc_non_terminal_node($$, result->malloc_pool_, T_INTO_OUTFILE, 5, $3, $4, $5, $6, $7); + malloc_non_terminal_node($$, result->malloc_pool_, T_INTO_OUTFILE, 6, $3, $4, $5, $6, $7, $8); } | INTO DUMPFILE STRING_VALUE { @@ -10560,8 +10633,24 @@ SINGLE opt_equal_mark BOOL_VALUE (void)($2); malloc_non_terminal_node($$, result->malloc_pool_, T_MAX_FILE_SIZE, 1, $3); } +| BUFFER_SIZE opt_equal_mark file_size_const +{ + (void)($2); + malloc_non_terminal_node($$, result->malloc_pool_, T_BUFFER_SIZE, 1, $3); +} ; +file_partition_opt: +/*empty*/ +{ + $$ = NULL; +} +| PARTITION BY bit_expr +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_PARTITION_EXPR, 1, $3); + dup_expr_string($$, result, @3.first_column, @3.last_column); +} + file_size_const: INTNUM { @@ -12844,6 +12933,31 @@ PARTITION '(' name_list ')' merge_nodes(name_list, result, T_NAME_LIST, $3); malloc_non_terminal_node($$, result->malloc_pool_, T_USE_PARTITION, 1, name_list); } +| PARTITION '(' external_table_partitions ')' +{ + ParseNode *partition_value = NULL; + merge_nodes(partition_value, result, T_EXTERNAL_TABLE_PARTITION, $3); + dup_expr_string(partition_value, result, @3.first_column, @3.last_column); + malloc_non_terminal_node($$, result->malloc_pool_, T_USE_PARTITION, 1, partition_value); +} + +external_table_partitions: +external_table_partition +{ + $$ = $1; +} +| external_table_partitions ',' external_table_partition +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); +} +; + +external_table_partition: +relation_name COMP_EQ expr_const +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_PARTITION_LIST_ELEMENT, 2, $1, $3); +} +; use_flashback: AS OF SNAPSHOT bit_expr %prec LOWER_PARENS @@ -22963,7 +23077,10 @@ unreserved_keyword_normal { $$=$1;} ; unreserved_keyword_normal: -ACCOUNT +ACCESSID +| ACCESSKEY +| ACCESSTYPE +| ACCOUNT | ACTION | ACTIVE | ADDDATE @@ -23027,6 +23144,7 @@ ACCOUNT | BYTE | BREADTH | BUCKETS +| BUFFER_SIZE | CACHE | CALIBRATION | CALIBRATION_INFO @@ -23069,6 +23187,7 @@ ACCOUNT | COMPLETION | COMPRESSED | COMPRESSION +| COMPRESSION_CODE | COMPUTATION | COMPUTE | CONCURRENT @@ -23146,6 +23265,7 @@ ACCOUNT | ENCRYPTED | ENCRYPTION | END +| ENDPOINT | ENDS | ENFORCED | ENGINE_ @@ -23381,6 +23501,7 @@ ACCOUNT | NVARCHAR | OBJECT | OCCUR +| QUOTA_NAME | OF | OFF | OFFSET @@ -23443,6 +23564,8 @@ ACCOUNT | PROFILE | PROFILES | PROGRESSIVE_MERGE_NUM +| PROJECT_NAME +| PROPERTIES | PS | PUBLIC | PCTFREE @@ -23599,6 +23722,7 @@ ACCOUNT | STORING | STRONG | STRING +| STSTOKEN | SUBCLASS_ORIGIN | SUBDATE | SUBJECT diff --git a/src/sql/plan_cache/ob_sql_parameterization.cpp b/src/sql/plan_cache/ob_sql_parameterization.cpp index dbb232b1f6..4472fc9a7e 100644 --- a/src/sql/plan_cache/ob_sql_parameterization.cpp +++ b/src/sql/plan_cache/ob_sql_parameterization.cpp @@ -360,6 +360,8 @@ bool ObSqlParameterization::is_tree_not_param(const ParseNode *tree) ret_bool = true; } else if (T_INTO_FILE_LIST == tree->type_) { ret_bool = true; + } else if (T_EXTERNAL_TABLE_PARTITION == tree->type_) { + ret_bool = true; } else if (T_PIVOT_IN_LIST == tree->type_) { ret_bool = true; } else if (T_CHAR_CHARSET == tree->type_) { diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp index fe6744afda..d83a7646dd 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp @@ -219,6 +219,7 @@ int ObAlterTableResolver::resolve(const ParseNode &parse_tree) ObTableSchema &alter_schema = alter_table_stmt->get_alter_table_schema(); alter_schema.set_table_type(table_schema_->get_table_type()); OZ (alter_schema.set_external_file_format(table_schema_->get_external_file_format())); + OZ (alter_schema.set_external_properties(table_schema_->get_external_properties())); OZ (alter_schema.set_external_file_location(table_schema_->get_external_file_location())); OZ (alter_schema.set_external_file_location_access_info(table_schema_->get_external_file_location_access_info())); OZ (alter_schema.set_external_file_pattern(table_schema_->get_external_file_pattern())); diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.cpp b/src/sql/resolver/ddl/ob_create_table_resolver.cpp index c0cf737016..f64136e881 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_table_resolver.cpp @@ -715,6 +715,13 @@ int ObCreateTableResolver::resolve(const ParseNode &parse_tree) if (create_table_stmt->get_create_table_arg().schema_.get_part_level() == ObPartitionLevel::PARTITION_LEVEL_ONE) { OZ (create_default_partition_for_table(create_table_stmt->get_create_table_arg().schema_)); } + /* + if (OB_FAIL(ret)) { + } else if (ObExternalFileFormat::FormatType::ODPS_FORMAT != external_table_format_type_ && + OB_FAIL(add_hidden_external_table_pk_col())) { + LOG_WARN("fail to add hidden pk col for external table", K(ret)); + } + */ if (OB_FAIL(ret)) { } else if (OB_FAIL(add_hidden_external_table_pk_col())) { LOG_WARN("fail to add hidden pk col for external table", K(ret)); @@ -1045,6 +1052,8 @@ int ObCreateTableResolver::check_external_table_generated_partition_column_sanit LOG_WARN("user specified partition col expr contains no external partition pseudo column is not supported", K(ret)); } } + } else if (table_schema.is_odps_external_table()) { + // lcqlog to do check } else { bool found = false; for (int64_t i = 0; OB_SUCC(ret) && i < col_exprs.count(); i++) { @@ -3137,7 +3146,7 @@ int ObCreateTableResolver::resolve_external_table_format_early(const ParseNode * int32_t num = node->num_child_; for (int32_t i = 0; OB_SUCC(ret) && i < num; ++i) { option_node = node->children_[i]; - if (OB_NOT_NULL(option_node) && T_EXTERNAL_FILE_FORMAT == option_node->type_) { + if (OB_NOT_NULL(option_node) && (T_EXTERNAL_FILE_FORMAT == option_node->type_ || T_EXTERNAL_PROPERTIES == option_node->type_)) { ObExternalFileFormat format; for (int32_t j = 0; OB_SUCC(ret) && j < option_node->num_child_; ++j) { if (OB_NOT_NULL(option_node->children_[j]) @@ -3153,12 +3162,6 @@ int ObCreateTableResolver::resolve_external_table_format_early(const ParseNode * } } } - if (OB_SUCC(ret) && external_table_format_type_ >= ObExternalFileFormat::PARQUET_FORMAT) { - uint64_t data_version = 0; - CK (OB_NOT_NULL(session_info_)); - OZ (GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), data_version)); - OV (DATA_VERSION_4_3_2_0 <= data_version, OB_NOT_SUPPORTED, data_version); - } return ret; } diff --git a/src/sql/resolver/ddl/ob_create_table_resolver_base.cpp b/src/sql/resolver/ddl/ob_create_table_resolver_base.cpp index 002debb91b..fcbcf9475e 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver_base.cpp +++ b/src/sql/resolver/ddl/ob_create_table_resolver_base.cpp @@ -397,12 +397,12 @@ int ObCreateTableResolverBase::set_table_option_to_schema(ObTableSchema &table_s table_schema.set_lob_inrow_threshold(lob_inrow_threshold_); } } - if (OB_SUCC(ret) && table_schema.is_external_table()) { - if (table_schema.get_external_file_format().empty() - || table_schema.get_external_file_location().empty()) { + if ((table_schema.get_external_file_format().empty() + || table_schema.get_external_file_location().empty()) && + table_schema.get_external_properties().empty()) { ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "Default format or location option for external table"); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "Default properties or format or location option for external table"); } } if (OB_SUCC(ret) && auto_increment_cache_size_ != 0) { diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index a6e495ab2a..30a55ebeb8 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -2592,8 +2592,22 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool } break; } + case T_EXTERNAL_PROPERTIES: case T_EXTERNAL_FILE_FORMAT: { - if (stmt::T_CREATE_TABLE != stmt_->get_stmt_type()) { + uint64_t data_version = 0; + uint64_t tenant_id = OB_INVALID_ID; + if (OB_ISNULL(session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (FALSE_IT(tenant_id = session_info_->get_effective_tenant_id())) { + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, data_version))) { + LOG_WARN("failed to get data version", K(ret)); + } else if (T_EXTERNAL_PROPERTIES == option_node->type_ && + data_version < DATA_VERSION_4_3_2_1) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support odps external table under CLUSTER_VERSION_4_3_2_1", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "odps external table"); + } else if (stmt::T_CREATE_TABLE != stmt_->get_stmt_type()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid file format option", K(ret)); } else { @@ -2623,7 +2637,9 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool LOG_USER_ERROR(OB_NOT_SUPPORTED, "format"); } // 2. resolve other format value + ObString masked_sql = params_.session_info_->get_current_query_string(); // that's create table operation stmt which has properties for (int i = 0; OB_SUCC(ret) && i < option_node->num_child_; ++i) { + ObString temp_masked_sql; if (OB_ISNULL(option_node->children_[i])) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed. get unexpected NULL ptr", K(ret), K(option_node->num_child_)); @@ -2631,11 +2647,17 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool T_CHARSET == option_node->children_[i]->type_) { } else if (OB_FAIL(resolve_file_format(option_node->children_[i], format))) { LOG_WARN("fail to resolve file format", K(ret)); + } else if (OB_FAIL(mask_properties_sensitive_info(option_node->children_[i], masked_sql, temp_masked_sql))) { + LOG_WARN("failed to mask properties sensitive info", K(ret), K(i), K(option_node->num_child_)); + } else if (!temp_masked_sql.empty()) { + masked_sql = temp_masked_sql; } } if (OB_SUCC(ret)) { bool is_valid = true; - if (OB_FAIL(check_format_valid(format, is_valid))) { + if (ObExternalFileFormat::ODPS_FORMAT == format.format_type_ && OB_FAIL(format.odps_format_.encrypt())) { + LOG_WARN("failed to encrypt odps format", K(ret)); + } else if (OB_FAIL(check_format_valid(format, is_valid))) { LOG_WARN("check format valid failed", K(ret)); } else if (!is_valid) { ret = OB_NOT_SUPPORTED; @@ -2654,9 +2676,21 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool } } while (OB_SUCC(ret) && pos >= buf_len); if (OB_SUCC(ret)) { - arg.schema_.set_external_file_format(ObString(pos, buf)); - LOG_DEBUG("debug external file format", - K(arg.schema_.get_external_file_format())); + if (ObExternalFileFormat::ODPS_FORMAT == format.format_type_) { + ObCreateTableStmt *create_table_stmt = static_cast(stmt_); + if (OB_ISNULL(create_table_stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else { + create_table_stmt->set_masked_sql(masked_sql); + arg.schema_.set_external_properties(ObString(pos, buf)); + } + + } else { + arg.schema_.set_external_file_format(ObString(pos, buf)); + LOG_DEBUG("debug external file format", + K(arg.schema_.get_external_file_format())); + } } } } @@ -2875,6 +2909,46 @@ int ObDDLResolver::resolve_file_format(const ParseNode *node, ObExternalFileForm LOG_WARN("invalid parse node", K(ret)); } else { switch (node->type_) { + case ObItemType::T_ACCESSTYPE: { + format.odps_format_.access_type_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_ACCESSID: { + format.odps_format_.access_id_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_ACCESSKEY: { + format.odps_format_.access_key_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_STSTOKEN: { + format.odps_format_.sts_token_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_ENDPOINT: { + format.odps_format_.endpoint_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_PROJECT: { + format.odps_format_.project_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_SCHEMA: { + format.odps_format_.schema_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_TABLE: { + format.odps_format_.table_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_QUOTA: { + format.odps_format_.quota_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } + case ObItemType::T_COMPRESSION_CODE: { + format.odps_format_.compression_code_ = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); + break; + } case T_EXTERNAL_FILE_FORMAT_TYPE: { ObString string_v = ObString(node->children_[0]->str_len_, node->children_[0]->str_value_).trim_space_only(); for (int i = 0; i < ObExternalFileFormat::MAX_FORMAT; i++) { @@ -3030,6 +3104,31 @@ int ObDDLResolver::resolve_file_format(const ParseNode *node, ObExternalFileForm return ret; } +int ObDDLResolver::mask_properties_sensitive_info(const ParseNode *node, ObString &ddl_sql, ObString &masked_sql) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(node) || node->num_child_ != 1 || OB_ISNULL(node->children_[0]) || + OB_ISNULL(params_.session_info_) || OB_ISNULL(params_.expr_factory_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid parse node", K(ret)); + } else { + switch (node->type_) { + case ObItemType::T_ENDPOINT: + case ObItemType::T_STSTOKEN: + case ObItemType::T_ACCESSKEY: + case ObItemType::T_ACCESSID: { + if (OB_FAIL(ObDCLResolver::mask_password_for_passwd_node(params_.allocator_, ddl_sql, node->children_[0], masked_sql, true))) { + LOG_WARN("fail to gen masked sql", K(ret)); + } + break; + } + default: { + // do nothing + } + } + } + return ret; +} int ObDDLResolver::resolve_column_definition_ref(ObColumnSchemaV2 &column, ParseNode *node /* column_definition_def */, @@ -3088,30 +3187,34 @@ int ObDDLResolver::resolve_column_definition_ref(ObColumnSchemaV2 &column, int ObDDLResolver::check_format_valid(const ObExternalFileFormat &format, bool &is_valid) { int ret = OB_SUCCESS; - if (!format.csv_format_.line_term_str_.empty() && !format.csv_format_.field_term_str_.empty()) { - if (0 == MEMCMP(format.csv_format_.field_term_str_.ptr(), - format.csv_format_.line_term_str_.ptr(), - std::min(format.csv_format_.field_term_str_.length(), - format.csv_format_.line_term_str_.length()))) { - is_valid = false; - LOG_USER_WARN(OB_NOT_SUPPORTED, - "LINE_DELIMITER or FIELD_DELIMITER cannot be a substring of the delimiter for the other"); - LOG_WARN("LINE_DELIMITER or FIELD_DELIMITER cann't be a substring of the other's", K(ret), - K(format.csv_format_.line_term_str_), K(format.csv_format_.field_term_str_)); + if (ObExternalFileFormat::ODPS_FORMAT == format.format_type_) { + is_valid = true; + } else { + if (!format.csv_format_.line_term_str_.empty() && !format.csv_format_.field_term_str_.empty()) { + if (0 == MEMCMP(format.csv_format_.field_term_str_.ptr(), + format.csv_format_.line_term_str_.ptr(), + std::min(format.csv_format_.field_term_str_.length(), + format.csv_format_.line_term_str_.length()))) { + is_valid = false; + LOG_USER_WARN(OB_NOT_SUPPORTED, + "LINE_DELIMITER or FIELD_DELIMITER cannot be a substring of the delimiter for the other"); + LOG_WARN("LINE_DELIMITER or FIELD_DELIMITER cann't be a substring of the other's", K(ret), + K(format.csv_format_.line_term_str_), K(format.csv_format_.field_term_str_)); + } + } + if (OB_SUCC(ret)) { + if (!format.csv_format_.line_term_str_.empty() + && (format.csv_format_.line_term_str_[0] == format.csv_format_.field_escaped_char_ + || format.csv_format_.line_term_str_[0] == format.csv_format_.field_enclosed_char_)) { + ret = OB_WRONG_FIELD_TERMINATORS; + LOG_WARN("invalid line terminator", K(ret)); + } else if (!format.csv_format_.field_term_str_.empty() + && (format.csv_format_.field_term_str_[0] == format.csv_format_.field_escaped_char_ + || format.csv_format_.field_term_str_[0] == format.csv_format_.field_enclosed_char_)) { + ret = OB_WRONG_FIELD_TERMINATORS; + LOG_WARN("invalid field terminator", K(ret)); + } } - } - if (OB_SUCC(ret)) { - if (!format.csv_format_.line_term_str_.empty() - && (format.csv_format_.line_term_str_[0] == format.csv_format_.field_escaped_char_ - || format.csv_format_.line_term_str_[0] == format.csv_format_.field_enclosed_char_)) { - ret = OB_WRONG_FIELD_TERMINATORS; - LOG_WARN("invalid line terminator", K(ret)); - } else if (!format.csv_format_.field_term_str_.empty() - && (format.csv_format_.field_term_str_[0] == format.csv_format_.field_escaped_char_ - || format.csv_format_.field_term_str_[0] == format.csv_format_.field_enclosed_char_)) { - ret = OB_WRONG_FIELD_TERMINATORS; - LOG_WARN("invalid field terminator", K(ret)); - } } return ret; } diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.h b/src/sql/resolver/ddl/ob_ddl_resolver.h index 09abb31ebb..9ee2861e6f 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.h +++ b/src/sql/resolver/ddl/ob_ddl_resolver.h @@ -959,6 +959,7 @@ protected: share::schema::ObTableSchema &table_schema); int resolve_file_format(const ParseNode *node, ObExternalFileFormat &format); + int mask_properties_sensitive_info(const ParseNode *node, ObString &ddl_sql, ObString &masked_sql); int check_format_valid(const ObExternalFileFormat &format, bool &is_valid); diff --git a/src/sql/resolver/dml/ob_default_value_utils.cpp b/src/sql/resolver/dml/ob_default_value_utils.cpp index eb8cc3d0f8..d1dc046afd 100644 --- a/src/sql/resolver/dml/ob_default_value_utils.cpp +++ b/src/sql/resolver/dml/ob_default_value_utils.cpp @@ -1104,12 +1104,19 @@ int ObDefaultValueUtils::build_nullable_expr(const ColumnItem *column, ObRawExpr int ObDefaultValueUtils::build_default_expr_for_generated_column(const ColumnItem &column, ObRawExpr *&expr) { int ret = OB_SUCCESS; - if (OB_ISNULL(column.expr_) || OB_ISNULL(stmt_) || OB_ISNULL(params_) || OB_ISNULL(params_->expr_factory_)) { + bool contain = false; + if (OB_ISNULL(column.expr_) || OB_ISNULL(stmt_) || OB_ISNULL(params_) + || OB_ISNULL(params_->expr_factory_) || OB_ISNULL(column.expr_->get_dependant_expr())) { ret = OB_NOT_INIT; LOG_WARN("column expr is null", K_(column.expr), K_(stmt)); - } else if (OB_FAIL(ObDMLResolver::copy_schema_expr(*params_->expr_factory_, - column.expr_->get_dependant_expr(), - expr))) { + } else if (OB_FAIL(ObResolverUtils::cnt_external_pseudo_column(*column.expr_->get_dependant_expr(), contain))) { + LOG_WARN("failed to check if contain external pseudo column", K(ret)); + // 外表生成列包含伪列 默认值为null + } else if (contain && OB_FAIL(ObRawExprUtils::build_null_expr(*params_->expr_factory_, expr))) { + LOG_WARN("fail to build null expr", K(ret)); + } else if (!contain && OB_FAIL(ObDMLResolver::copy_schema_expr(*params_->expr_factory_, + column.expr_->get_dependant_expr(), + expr))) { LOG_WARN("failed to copy dependant expr", K(ret)); } return ret; diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.cpp b/src/sql/resolver/dml/ob_del_upd_resolver.cpp index f7c423de42..746e69c517 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.cpp +++ b/src/sql/resolver/dml/ob_del_upd_resolver.cpp @@ -25,6 +25,7 @@ #include "pl/ob_pl_resolver.h" #include "sql/parser/parse_malloc.h" #include "sql/resolver/dml/ob_merge_resolver.h" +#include "share/external_table/ob_external_table_utils.h" namespace oceanbase { @@ -1858,6 +1859,8 @@ int ObDelUpdResolver::add_all_columns_to_stmt(const TableItem &table_item, if (OB_ISNULL(column)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid column schema", K(column)); + } else if (schema::EXTERNAL_TABLE == base_table_item.table_type_ + && ObExternalTableUtils::is_skipped_insert_column(*column)) { } else if (OB_FAIL(add_column_to_stmt(table_item, *column, column_exprs))) { LOG_WARN("add column item to stmt failed", K(ret)); } @@ -1955,6 +1958,11 @@ int ObDelUpdResolver::add_all_rowkey_columns_to_stmt(const TableItem &table_item LOG_WARN("get rowkey info failed", K(ret), K(i), K(rowkey_info)); } else if (OB_FAIL(get_column_schema(base_table_item.ref_id_, rowkey_column_id, column_schema, true, base_table_item.is_link_table()))) { LOG_WARN("get column schema failed", K(rowkey_column_id)); + } else if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid column schema", K(column_schema)); + } else if (schema::EXTERNAL_TABLE == base_table_item.table_type_ + && ObExternalTableUtils::is_skipped_insert_column(*column_schema)) { } else if (OB_FAIL(add_column_to_stmt(table_item, *column_schema, column_exprs))) { LOG_WARN("add column to stmt failed", K(ret), K(table_item)); } @@ -3215,6 +3223,7 @@ int ObDelUpdResolver::resolve_insert_columns(const ParseNode *node, int ret = OB_SUCCESS; TableItem *table_item = NULL; ObDelUpdStmt *del_upd_stmt = get_del_upd_stmt(); + bool is_ext_part_column = false; if (OB_ISNULL(del_upd_stmt) || OB_ISNULL(session_info_) || OB_ISNULL(schema_checker_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid insert stmt", K(del_upd_stmt), K_(session_info), K_(schema_checker)); @@ -3269,6 +3278,13 @@ int ObDelUpdResolver::resolve_insert_columns(const ParseNode *node, } else if (!session_info_->get_ddl_info().is_ddl() && OB_HIDDEN_SESS_CREATE_TIME_COLUMN_ID == column_expr->get_column_id()) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "specify __sess_create_time value"); + } else if (schema::EXTERNAL_TABLE == table_item->table_type_ + && OB_FAIL(is_external_table_partition_column(*table_item, column_expr->get_column_id(), is_ext_part_column))) { + LOG_WARN("failed to check external table column", K(ret)); + } else if (is_ext_part_column) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("column list should not contain external table partition column", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "contain external table partition column in insert column list"); } else if (OB_FAIL(mock_values_column_ref(column_expr))) { LOG_WARN("mock values column reference failed", K(ret)); } @@ -3310,7 +3326,10 @@ int ObDelUpdResolver::resolve_insert_columns(const ParseNode *node, } else if (is_duplicate) { ret = OB_ERR_FIELD_SPECIFIED_TWICE; LOG_USER_ERROR(OB_ERR_FIELD_SPECIFIED_TWICE, to_cstring(column_items.at(i).column_name_)); - } else if (OB_FAIL(mock_values_column_ref(column_items.at(i).expr_))) { + } else if (schema::EXTERNAL_TABLE == table_item->table_type_ + && OB_FAIL(is_external_table_partition_column(*table_item, column_id, is_ext_part_column))) { + LOG_WARN("failed to check external table column", K(ret)); + } else if (!is_ext_part_column && OB_FAIL(mock_values_column_ref(column_items.at(i).expr_))) { LOG_WARN("mock values column reference failed", K(ret)); } } @@ -3344,7 +3363,31 @@ int ObDelUpdResolver::resolve_insert_columns(const ParseNode *node, } return ret; } - +int ObDelUpdResolver::is_external_table_partition_column(const TableItem &table_item, + uint64_t column_id, + bool &is_part_column) +{ + int ret = OB_SUCCESS; + const ObTableSchema* table_schema = NULL; + const ObColumnSchemaV2 *column_schema = NULL; + is_part_column = false; + if (OB_ISNULL(session_info_) || OB_ISNULL(schema_checker_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(schema_checker_->get_table_schema(session_info_->get_effective_tenant_id(), + table_item.ref_id_, + table_schema, + table_item.is_link_table()))) { + LOG_WARN("fail to get table schema", K(ret), K(table_item.ref_id_)); + } else if (OB_ISNULL(table_schema) + || OB_ISNULL(column_schema = table_schema->get_column_schema(column_id))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column schema is null", K(ret)); + } else { + is_part_column = column_schema->is_tbl_part_key_column(); + } + return ret; +} int ObDelUpdResolver::resolve_insert_values(const ParseNode *node, ObInsertTableInfo& table_info, ObIArray &label_se_columns) @@ -3356,6 +3399,7 @@ int ObDelUpdResolver::resolve_insert_values(const ParseNode *node, uint64_t value_count = OB_INVALID_ID; bool is_all_default = false; bool is_update_view = false; + TableItem* table_item = NULL; if (OB_ISNULL(del_upd_stmt) || OB_ISNULL(node) || OB_ISNULL(session_info_) || T_VALUE_LIST != node->type_ || OB_ISNULL(node->children_) || OB_ISNULL(del_upd_stmt->get_query_ctx())) { ret = OB_INVALID_ARGUMENT; @@ -3376,7 +3420,6 @@ int ObDelUpdResolver::resolve_insert_values(const ParseNode *node, LOG_WARN("reserve memory fail", K(ret)); } if (OB_SUCC(ret)) { - TableItem* table_item = NULL; if (OB_FAIL(check_need_match_all_params(table_info.values_desc_, del_upd_stmt->get_query_ctx()->need_match_all_params_))) { LOG_WARN("check need match all params failed", K(ret)); @@ -3491,7 +3534,7 @@ int ObDelUpdResolver::resolve_insert_values(const ParseNode *node, } else if (OB_FAIL(check_basic_column_generated(column_expr, del_upd_stmt, is_generated_column))) { LOG_WARN("check column generated failed", K(ret)); - } else if (is_generated_column) { + } else if (is_generated_column && schema::EXTERNAL_TABLE != table_item->table_type_) { ret = OB_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN; if (!is_oracle_mode()) { ColumnItem *orig_col_item = NULL; diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.h b/src/sql/resolver/dml/ob_del_upd_resolver.h index aada09ed19..32c86305ac 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.h +++ b/src/sql/resolver/dml/ob_del_upd_resolver.h @@ -284,6 +284,9 @@ protected: int mark_json_partial_update_flag(const ObColumnRefRawExpr *ref_expr, ObRawExpr *expr, int depth, bool &allow_json_partial_update); int add_select_item_func(ObSelectStmt &select_stmt, ColumnItem &col); int select_items_is_pk(const ObSelectStmt& select_stmt, bool &has_pk); + int is_external_table_partition_column(const TableItem &table_item, + uint64_t column_id, + bool &is_part_column); private: common::hash::ObPlacementHashSet insert_column_ids_; diff --git a/src/sql/resolver/dml/ob_dml_resolver.cpp b/src/sql/resolver/dml/ob_dml_resolver.cpp index 618b9852f9..af4b544394 100755 --- a/src/sql/resolver/dml/ob_dml_resolver.cpp +++ b/src/sql/resolver/dml/ob_dml_resolver.cpp @@ -7622,46 +7622,52 @@ int ObDMLResolver::resolve_partitions(const ParseNode *part_node, int ret = OB_SUCCESS; if (NULL != part_node) { OB_ASSERT(1 == part_node->num_child_ && part_node->children_[0]->num_child_ > 0); - const ParseNode *name_list = part_node->children_[0]; - ObString partition_name; - ObSEArray part_ids; - ObSEArray part_names; - for (int i = 0; OB_SUCC(ret) && i < name_list->num_child_; i++) { - ObSEArray partition_ids; - partition_name.assign_ptr(name_list->children_[i]->str_value_, - static_cast(name_list->children_[i]->str_len_)); - //here just conver partition_name to its lowercase - ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, partition_name); - ObPartGetter part_getter(table_schema); - if (T_USE_PARTITION == part_node->type_) { - if (OB_FAIL(part_getter.get_part_ids(partition_name, partition_ids))) { - LOG_WARN("failed to get part ids", K(ret), K(partition_name)); - if (OB_UNKNOWN_PARTITION == ret && lib::is_mysql_mode()) { - LOG_USER_ERROR(OB_UNKNOWN_PARTITION, partition_name.length(), partition_name.ptr(), - table_schema.get_table_name_str().length(), - table_schema.get_table_name_str().ptr()); + if (T_NAME_LIST == part_node->children_[0]->type_) { + const ParseNode *name_list = part_node->children_[0]; + ObString partition_name; + ObSEArray part_ids; + ObSEArray part_names; + for (int i = 0; OB_SUCC(ret) && i < name_list->num_child_; i++) { + ObSEArray partition_ids; + partition_name.assign_ptr(name_list->children_[i]->str_value_, + static_cast(name_list->children_[i]->str_len_)); + //here just conver partition_name to its lowercase + ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, partition_name); + ObPartGetter part_getter(table_schema); + if (T_USE_PARTITION == part_node->type_) { + if (OB_FAIL(part_getter.get_part_ids(partition_name, partition_ids))) { + LOG_WARN("failed to get part ids", K(ret), K(partition_name)); + if (OB_UNKNOWN_PARTITION == ret && lib::is_mysql_mode()) { + LOG_USER_ERROR(OB_UNKNOWN_PARTITION, partition_name.length(), partition_name.ptr(), + table_schema.get_table_name_str().length(), + table_schema.get_table_name_str().ptr()); + } + } + } else if (OB_FAIL(part_getter.get_subpart_ids(partition_name, partition_ids))) { + LOG_WARN("failed to get subpart ids", K(ret), K(partition_name)); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(append_array_no_dup(part_ids, partition_ids))) { + LOG_WARN("Push partition id error", K(ret)); + } else if (OB_FAIL(part_names.push_back(partition_name))) { + LOG_WARN("failed to push back partition name", K(ret)); + } else { + LOG_INFO("part ids", K(partition_name), K(partition_ids)); } } - } else if (OB_FAIL(part_getter.get_subpart_ids(partition_name, partition_ids))) { - LOG_WARN("failed to get subpart ids", K(ret), K(partition_name)); } - if (OB_SUCC(ret)) { - if (OB_FAIL(append_array_no_dup(part_ids, partition_ids))) { - LOG_WARN("Push partition id error", K(ret)); - } else if (OB_FAIL(part_names.push_back(partition_name))) { - LOG_WARN("failed to push back partition name", K(ret)); - } else { - LOG_INFO("part ids", K(partition_name), K(partition_ids)); + if (OB_FAIL(table_item.part_ids_.assign(part_ids))) { + LOG_WARN("failed to assign part ids", K(ret)); + } else if (OB_FAIL(table_item.part_names_.assign(part_names))) { + LOG_WARN("failed to assign part names", K(ret)); } } - } - if (OB_SUCC(ret)) { - if (OB_FAIL(table_item.part_ids_.assign(part_ids))) { - LOG_WARN("failed to assign part ids", K(ret)); - } else if (OB_FAIL(table_item.part_names_.assign(part_names))) { - LOG_WARN("failed to assign part names", K(ret)); - } + } else if (schema::EXTERNAL_TABLE == table_item.table_type_ + && T_EXTERNAL_TABLE_PARTITION == part_node->children_[0]->type_) { + table_item.external_table_partition_.assign_ptr(part_node->children_[0]->str_value_, + part_node->children_[0]->str_len_); } } return ret; @@ -8245,7 +8251,10 @@ int ObDMLResolver::resolve_external_table_generated_column( } } else { ObExternalFileFormat format; - if (OB_FAIL(format.load_from_string(table_schema->get_external_file_format(), *params_.allocator_))) { + const ObString &format_or_properties = table_schema->get_external_file_format().empty() ? + table_schema->get_external_properties() : + table_schema->get_external_file_format(); + if (OB_FAIL(format.load_from_string(format_or_properties, *params_.allocator_))) { LOG_WARN("load from string failed", K(ret)); } else if (format.format_type_ == ObExternalFileFormat::ORC_FORMAT && lib::is_oracle_mode()) { ret = OB_NOT_SUPPORTED; @@ -8274,6 +8283,19 @@ int ObDMLResolver::resolve_external_table_generated_column( LOG_WARN("fail to build external table file column expr", K(ret)); } } + } else if (ObExternalFileFormat::ODPS_FORMAT == format.format_type_) { + if (OB_FAIL(ObResolverUtils::calc_file_column_idx(col.col_name_, file_column_idx))) { + LOG_WARN("fail to calc file column idx", K(ret)); + } else if (nullptr == (real_ref_expr = ObResolverUtils::find_file_column_expr( + pseudo_external_file_col_exprs_, table_item.table_id_, file_column_idx, col.col_name_))) { + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_odps( + *params_.expr_factory_, *params_.session_info_, + table_item.table_id_, table_item.table_name_, + col.col_name_, file_column_idx, column_schema, + real_ref_expr))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } } else if (ObExternalFileFormat::PARQUET_FORMAT == format.format_type_ || ObExternalFileFormat::ORC_FORMAT == format.format_type_ ) { ObRawExpr *cast_expr = NULL; @@ -8528,7 +8550,8 @@ int ObDMLResolver::resolve_generated_column_expr(const ObString &expr_str, LOG_WARN("build padding expr for self failed", K(ret)); } else if (OB_FAIL(ref_expr->formalize_with_local_vars(session_info, &local_vars, var_array_idx))) { LOG_WARN("formailize column reference expr failed", K(ret)); - } else if (ObRawExprUtils::need_column_conv(column.get_result_type(), *ref_expr, true)) { + } else if (table_schema->is_external_table() + || ObRawExprUtils::need_column_conv(column.get_result_type(), *ref_expr, true)) { if (OB_FAIL(ObRawExprUtils::build_column_conv_expr(*expr_factory, *allocator_, column, ref_expr, session_info, used_for_generated_column, diff --git a/src/sql/resolver/dml/ob_dml_stmt.cpp b/src/sql/resolver/dml/ob_dml_stmt.cpp index 27ddf82558..9eef0e01b9 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.cpp +++ b/src/sql/resolver/dml/ob_dml_stmt.cpp @@ -270,6 +270,8 @@ int TableItem::deep_copy(ObIRawExprCopier &expr_copier, ddl_schema_version_ = other.ddl_schema_version_; ddl_table_id_ = other.ddl_table_id_; ref_query_ = other.ref_query_; + //external table + external_table_partition_ = other.external_table_partition_; SampleInfo *buf = NULL; if (is_json_table() && OB_FAIL(deep_copy_json_table_def(*other.json_table_def_, expr_copier, allocator))) { diff --git a/src/sql/resolver/dml/ob_dml_stmt.h b/src/sql/resolver/dml/ob_dml_stmt.h index bbe640f418..2757506d04 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.h +++ b/src/sql/resolver/dml/ob_dml_stmt.h @@ -290,10 +290,8 @@ struct TableItem K_(is_view_table), K_(part_ids), K_(part_names), K_(cte_type), KPC_(function_table_expr), K_(flashback_query_type), KPC_(flashback_query_expr), K_(table_type), - K_(exec_params), - KPC_(sample_info), - K_(mview_id), - K_(need_expand_rt_mv)); + K_(exec_params), KPC_(sample_info), K_(mview_id), K_(need_expand_rt_mv), + K_(external_table_partition)); enum TableType { @@ -307,7 +305,6 @@ struct TableItem TEMP_TABLE, LINK_TABLE, JSON_TABLE, - EXTERNAL_TABLE, VALUES_TABLE, LATERAL_TABLE, }; @@ -442,6 +439,8 @@ struct TableItem ObJsonTableDef *json_table_def_; // values table ObValuesTableDef *values_table_def_; + // external table + common::ObString external_table_partition_; // sample scan infos SampleInfo *sample_info_; }; diff --git a/src/sql/resolver/dml/ob_insert_resolver.cpp b/src/sql/resolver/dml/ob_insert_resolver.cpp index a6ad48e3cd..573d8a8d1b 100644 --- a/src/sql/resolver/dml/ob_insert_resolver.cpp +++ b/src/sql/resolver/dml/ob_insert_resolver.cpp @@ -181,6 +181,9 @@ int ObInsertResolver::resolve(const ParseNode &parse_tree) LOG_WARN("view not insertable", K(ret)); } } + if (OB_SUCC(ret) && OB_FAIL(check_insert_into_external_table())) { + LOG_WARN("check insert into external table failed", K(ret)); + } return ret; } @@ -980,7 +983,11 @@ int ObInsertResolver::check_insert_select_field(ObInsertStmt &insert_stmt, bool is_generated_column = false; const ObIArray &values_desc = insert_stmt.get_values_desc(); ObSelectStmt *ref_stmt = NULL; - if (OB_ISNULL(session_info_)) { + TableItem *insert_table = NULL; + if (OB_ISNULL(insert_table = insert_stmt.get_table_item_by_id(insert_stmt.get_insert_table_info().table_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("insert target table is unexpected null", K(ret)); + } else if (OB_ISNULL(session_info_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid session_info_", K(ret)); } else if (values_desc.count() != select_stmt.get_select_item_size()) { @@ -1006,7 +1013,7 @@ int ObInsertResolver::check_insert_select_field(ObInsertStmt &insert_stmt, &insert_stmt, is_generated_column))) { LOG_WARN("check basic column generated failed", K(ret)); - } else if (is_generated_column) { + } else if (is_generated_column && schema::EXTERNAL_TABLE != insert_table->table_type_) { if (select_stmt.get_table_size() == 1 && select_stmt.get_table_item(0) != NULL && select_stmt.get_table_item(0)->is_values_table()) { @@ -1167,6 +1174,31 @@ int ObInsertResolver::check_returning_validity() return ret; } +int ObInsertResolver::check_insert_into_external_table() +{ + int ret = OB_SUCCESS; + ObInsertStmt *insert_stmt = get_insert_stmt(); + TableItem *table = NULL; + if (OB_ISNULL(insert_stmt) || insert_stmt->get_table_items().empty() + || OB_ISNULL(table = insert_stmt->get_table_item(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid stmt", K(ret), K(insert_stmt)); + } else if (schema::EXTERNAL_TABLE != table->table_type_) { + // do nothing + } else if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_4_3_2_1) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support to insert into external table during updating", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "insert into external table during updating"); + } else if (!insert_stmt->value_from_select() || insert_stmt->is_replace() + || insert_stmt->is_ignore() || insert_stmt->is_returning() + || insert_stmt->is_insert_up()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support insert into external table with values, replace, ignore, returning, update", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "insert into external table with values, replace, ignore, returning, update"); + } + return ret; +} + int ObInsertResolver::resolve_column_ref_expr(const ObQualifiedName &q_name, ObRawExpr *&real_ref_expr) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/dml/ob_insert_resolver.h b/src/sql/resolver/dml/ob_insert_resolver.h index 801fc9b9bd..a74f39ef4d 100644 --- a/src/sql/resolver/dml/ob_insert_resolver.h +++ b/src/sql/resolver/dml/ob_insert_resolver.h @@ -113,6 +113,7 @@ private: return sub_select_resolver_; } int try_expand_returning_exprs(); + int check_insert_into_external_table(); DISALLOW_COPY_AND_ASSIGN(ObInsertResolver); private: int64_t row_count_; diff --git a/src/sql/resolver/dml/ob_select_resolver.cpp b/src/sql/resolver/dml/ob_select_resolver.cpp index 838df5e767..604b93d1f7 100644 --- a/src/sql/resolver/dml/ob_select_resolver.cpp +++ b/src/sql/resolver/dml/ob_select_resolver.cpp @@ -5122,9 +5122,17 @@ int ObSelectResolver::resolve_into_file_node(const ParseNode *list_node, ObSelec into_item.is_single_ = node->children_[0]->value_; } } else if (T_MAX_FILE_SIZE == node->type_) { - if (OB_FAIL(resolve_max_file_size_node(node, into_item))) { + if (OB_FAIL(resolve_size_node(node, into_item))) { LOG_WARN("failed to resolve max file size", K(ret)); } + } else if (T_BUFFER_SIZE == node->type_) { + if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_4_3_2_1) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support to set buffer size during updating", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "set buffer size during updating"); + } else if (OB_FAIL(resolve_size_node(node, into_item))) { + LOG_WARN("failed to resolve buffer size", K(ret)); + } } else { ret = OB_ERR_PARSE_SQL; LOG_WARN("child of into file node has wrong type", K(ret)); @@ -5134,13 +5142,48 @@ int ObSelectResolver::resolve_into_file_node(const ParseNode *list_node, ObSelec return ret; } -int ObSelectResolver::resolve_max_file_size_node(const ParseNode *file_size_node, ObSelectIntoItem &into_item) +int ObSelectResolver::resolve_file_partition_node(const ParseNode *node, ObSelectIntoItem &into_item) +{ + int ret = OB_SUCCESS; + ObRawExpr *expr = NULL; + ObSQLSessionInfo *session_info = params_.session_info_; + ObArray columns; + if (OB_ISNULL(node) || OB_ISNULL(session_info) || T_PARTITION_EXPR != node->type_ + || node->num_child_ != 1 || OB_ISNULL(node->children_[0])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("partition node is null", K(ret)); + } else if (OB_FAIL(resolve_sql_expr(*(node->children_[0]), expr, &columns))) { + LOG_WARN("fail to resolve const expr", K(ret)); + } else if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is null", K(ret), K(node->children_[0]->type_)); + } else if (OB_FAIL(expr->formalize(session_info))) { + LOG_WARN("failed to formalize expr", K(ret), K(*expr)); + } else if (expr->has_flag(CNT_SUB_QUERY) || expr->has_flag(CNT_AGG) + || expr->has_flag(CNT_WINDOW_FUNC)|| expr->has_flag(CNT_PL_UDF) + || expr->has_flag(CNT_SO_UDF) || expr->has_flag(CNT_MATCH_EXPR)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("partition expr should not contain subquery, aggregate, udf or match expr", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "partition expr contains subquery, aggregate, udf or match expr"); + } else if (ObVarcharType != expr->get_result_type().get_type() + && ObCharType != expr->get_result_type().get_type()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("partition expr should be char or varchar", K(ret), K(expr->get_result_type().get_type())); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "partition by expr whose result type is not char or varchar"); + } else { + into_item.file_partition_expr_ = expr; + } + return ret; +} + +int ObSelectResolver::resolve_size_node(const ParseNode *file_size_node, ObSelectIntoItem &into_item) { int ret = OB_SUCCESS; ParseNode *child = NULL; int64_t parse_int_value = 0; - if (OB_ISNULL(file_size_node) || T_MAX_FILE_SIZE != file_size_node->type_ - || file_size_node->num_child_ != 1 || OB_ISNULL(child = file_size_node->children_[0])) { + if (OB_ISNULL(file_size_node) || file_size_node->num_child_ != 1 + || OB_ISNULL(child = file_size_node->children_[0]) + || (T_MAX_FILE_SIZE != file_size_node->type_ && T_BUFFER_SIZE != file_size_node->type_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected file size node", K(ret)); } else if (T_INT == child->type_) { @@ -5154,11 +5197,13 @@ int ObSelectResolver::resolve_max_file_size_node(const ParseNode *file_size_node LOG_WARN("child of max file size node has wrong type", K(ret)); } if (OB_FAIL(ret)) { - } else if (OB_UNLIKELY(parse_int_value <= 0)) { + } else if (OB_UNLIKELY(parse_int_value < 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("file size value should be positive", K(ret), K(parse_int_value)); - } else { + LOG_WARN("size should not be smaller than 0", K(ret), K(parse_int_value)); + } else if (T_MAX_FILE_SIZE == file_size_node->type_) { into_item.max_file_size_ = parse_int_value; + } else if (T_BUFFER_SIZE == file_size_node->type_) { + into_item.buffer_size_ = parse_int_value; } return ret; } @@ -5212,11 +5257,22 @@ int ObSelectResolver::resolve_into_clause(const ParseNode *node) new(into_item) ObSelectIntoItem(); into_item->into_type_ = node->type_; if (T_INTO_OUTFILE == node->type_) { // into outfile - if (OB_FAIL(resolve_into_const_node(node->children_[0], into_item->outfile_name_))) { //name + if (node->num_child_ > 0 + && OB_FAIL(resolve_into_const_node(node->children_[0], into_item->outfile_name_))) { //name LOG_WARN("resolve into outfile name failed", K(ret)); - } else if (NULL != node->children_[1]) { // charset + } + if (OB_SUCC(ret) && node->num_child_ > 1 && NULL != node->children_[1]) { // partition by + if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_4_3_2_1) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support to use file partition option during updating", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "use file partition option during updating"); + } else if (OB_FAIL(resolve_file_partition_node(node->children_[1], *into_item))) { + LOG_WARN("resolve file partition node failed", K(ret)); + } + } + if (OB_SUCC(ret) && node->num_child_ > 2 && NULL != node->children_[2]) { // charset ObCharsetType charset_type = CHARSET_INVALID; - ObString charset(node->children_[1]->str_len_, node->children_[1]->str_value_); + ObString charset(node->children_[2]->str_len_, node->children_[2]->str_value_); if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(charset.trim()))) { ret = OB_ERR_UNKNOWN_CHARSET; LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr()); @@ -5228,25 +5284,31 @@ int ObSelectResolver::resolve_into_clause(const ParseNode *node) into_item->cs_type_ = ObCharset::get_default_collation(charset_type); } } - if (OB_SUCC(ret) && NULL != node->children_[2]) { //field - if (OB_FAIL(resolve_into_field_node(node->children_[2], *into_item))) { + if (OB_SUCC(ret) && node->num_child_ > 3 && NULL != node->children_[3]) { //field + if (OB_FAIL(resolve_into_field_node(node->children_[3], *into_item))) { LOG_WARN("reosolve into field node failed", K(ret)); } } - if (OB_SUCC(ret) && NULL != node->children_[3]) { // line - if (OB_FAIL(resolve_into_line_node(node->children_[3], *into_item))) { + if (OB_SUCC(ret) && node->num_child_ > 4 && NULL != node->children_[4]) { // line + if (OB_FAIL(resolve_into_line_node(node->children_[4], *into_item))) { LOG_WARN("reosolve into line node failed", K(ret)); } } - if (OB_SUCC(ret) && NULL != node->children_[4]) { // file: single & max_file_size + // file: single, max_file_size, buffer_size + if (OB_SUCC(ret) && node->num_child_ > 5 && NULL != node->children_[5]) { if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_4_3_1_0) { ret = OB_NOT_SUPPORTED; LOG_WARN("not support to use file option during updating", K(ret)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "use file option during updating"); - } else if (OB_FAIL(resolve_into_file_node(node->children_[4], *into_item))) { + } else if (OB_FAIL(resolve_into_file_node(node->children_[5], *into_item))) { LOG_WARN("reosolve into line node failed", K(ret)); } } + if (OB_SUCC(ret) && into_item->is_single_ && NULL != into_item->file_partition_expr_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support to use file partition option when single is true", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "use file partition option when single is true"); + } } else if (T_INTO_DUMPFILE == node->type_) { // into dumpfile if (OB_FAIL(resolve_into_const_node(node->children_[0], into_item->outfile_name_))) { LOG_WARN("resolve into outfile name failed", K(ret)); diff --git a/src/sql/resolver/dml/ob_select_resolver.h b/src/sql/resolver/dml/ob_select_resolver.h index e467507dd2..7c9ac026c9 100644 --- a/src/sql/resolver/dml/ob_select_resolver.h +++ b/src/sql/resolver/dml/ob_select_resolver.h @@ -202,7 +202,8 @@ protected: int resolve_into_line_node(const ParseNode *node, ObSelectIntoItem &into_item); int resolve_into_variable_node(const ParseNode *node, ObSelectIntoItem &into_item); int resolve_into_file_node(const ParseNode *node, ObSelectIntoItem &into_item); - int resolve_max_file_size_node(const ParseNode *file_size_node, ObSelectIntoItem &into_item); + int resolve_file_partition_node(const ParseNode *node, ObSelectIntoItem &into_item); + int resolve_size_node(const ParseNode *file_size_node, ObSelectIntoItem &into_item); int resolve_varchar_file_size(const ParseNode *child, int64_t &parse_int_value) const; // resolve_star related functions int resolve_star_for_table_groups(ObStarExpansionInfo &star_expansion_info); diff --git a/src/sql/resolver/dml/ob_select_stmt.cpp b/src/sql/resolver/dml/ob_select_stmt.cpp index 375a627c60..6f99479853 100644 --- a/src/sql/resolver/dml/ob_select_stmt.cpp +++ b/src/sql/resolver/dml/ob_select_stmt.cpp @@ -52,12 +52,43 @@ int SelectItem::deep_copy(ObIRawExprCopier &expr_copier, return ret; } +int ObSelectIntoItem::deep_copy(ObIRawExprCopier &copier, + const ObSelectIntoItem &other) +{ + int ret = OB_SUCCESS; + into_type_ = other.into_type_; + outfile_name_ = other.outfile_name_; + field_str_ = other.field_str_; + line_str_ = other.line_str_; + closed_cht_ = other.closed_cht_; + is_optional_ = other.is_optional_; + is_single_ = other.is_single_; + max_file_size_ = other.max_file_size_; + escaped_cht_ = other.escaped_cht_; + cs_type_ = other.cs_type_; + file_partition_expr_ = other.file_partition_expr_; + buffer_size_ = other.buffer_size_; + user_vars_.assign(other.user_vars_); + if (OB_FAIL(copier.copy(other.file_partition_expr_, file_partition_expr_))) { + LOG_WARN("deep copy file partition expr failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < other.pl_vars_.count(); ++i) { + ObRawExpr* pl_var; + if (OB_FAIL(copier.copy(other.pl_vars_.at(i), pl_var))) { + LOG_WARN("failed to copy exprs", K(ret)); + } else if (OB_FAIL(pl_vars_.push_back(pl_var))) { + LOG_WARN("failed to push back group by expr", K(ret)); + } + } + return ret; +} const char* const ObSelectIntoItem::DEFAULT_LINE_TERM_STR = "\n"; const char* const ObSelectIntoItem::DEFAULT_FIELD_TERM_STR = "\t"; const char ObSelectIntoItem::DEFAULT_FIELD_ENCLOSED_CHAR = 0; const bool ObSelectIntoItem::DEFAULT_OPTIONAL_ENCLOSED = false; const bool ObSelectIntoItem::DEFAULT_SINGLE_OPT = true; const int64_t ObSelectIntoItem::DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024; +const int64_t ObSelectIntoItem::DEFAULT_BUFFER_SIZE = 1 * 1024 * 1024; const char ObSelectIntoItem::DEFAULT_FIELD_ESCAPED_CHAR = '\\'; //对于select .. for update 也认为是被更改 @@ -336,7 +367,7 @@ int ObSelectStmt::deep_copy_stmt_struct(ObIAllocator &allocator, LOG_WARN("failed to allocate select into item", K(ret)); } else { temp_into_item = new(ptr) ObSelectIntoItem(); - if (OB_FAIL(temp_into_item->assign(*other.into_item_))) { + if (OB_FAIL(temp_into_item->deep_copy(expr_copier, *other.into_item_))) { LOG_WARN("deep copy into item failed", K(ret)); } else { into_item_ = temp_into_item; @@ -473,6 +504,10 @@ int ObSelectStmt::iterate_stmt_expr(ObStmtExprVisitor &visitor) LOG_WARN("failed to visit select into", K(ret)); } } + if (OB_SUCC(ret) && into_item_->file_partition_expr_ != NULL + && OB_FAIL(visitor.visit(into_item_->file_partition_expr_, SCOPE_SELECT))) { + LOG_WARN("failed to visit select into", K(ret)); + } } return ret; } diff --git a/src/sql/resolver/dml/ob_select_stmt.h b/src/sql/resolver/dml/ob_select_stmt.h index 88b28e5882..e2ffcaff7a 100644 --- a/src/sql/resolver/dml/ob_select_stmt.h +++ b/src/sql/resolver/dml/ob_select_stmt.h @@ -126,7 +126,9 @@ struct ObSelectIntoItem is_optional_(DEFAULT_OPTIONAL_ENCLOSED), is_single_(DEFAULT_SINGLE_OPT), max_file_size_(DEFAULT_MAX_FILE_SIZE), - escaped_cht_() + escaped_cht_(), + file_partition_expr_(NULL), + buffer_size_(DEFAULT_BUFFER_SIZE) { field_str_.set_varchar(DEFAULT_FIELD_TERM_STR); field_str_.set_collation_type(ObCharset::get_system_collation()); @@ -151,8 +153,12 @@ struct ObSelectIntoItem max_file_size_ = other.max_file_size_; escaped_cht_ = other.escaped_cht_; cs_type_ = other.cs_type_; + file_partition_expr_ = other.file_partition_expr_; + buffer_size_ = other.buffer_size_; return user_vars_.assign(other.user_vars_); } + int deep_copy(ObIRawExprCopier &copier, + const ObSelectIntoItem &other); TO_STRING_KV(K_(into_type), K_(outfile_name), K_(field_str), @@ -162,7 +168,8 @@ struct ObSelectIntoItem K_(is_single), K_(max_file_size), K_(escaped_cht), - K_(cs_type)); + K_(cs_type), + N_EXPR, file_partition_expr_); ObItemType into_type_; common::ObObj outfile_name_; common::ObObj field_str_; // field terminated str @@ -175,6 +182,9 @@ struct ObSelectIntoItem int64_t max_file_size_; common::ObObj escaped_cht_; common::ObCollationType cs_type_; + sql::ObRawExpr* file_partition_expr_; + int64_t buffer_size_; + ObPQDistributeMethod::Type dist_method_; static const char* const DEFAULT_FIELD_TERM_STR; static const char* const DEFAULT_LINE_TERM_STR; @@ -182,6 +192,7 @@ struct ObSelectIntoItem static const bool DEFAULT_OPTIONAL_ENCLOSED; static const bool DEFAULT_SINGLE_OPT; static const int64_t DEFAULT_MAX_FILE_SIZE; + static const int64_t DEFAULT_BUFFER_SIZE; static const char DEFAULT_FIELD_ESCAPED_CHAR; }; diff --git a/src/sql/resolver/ob_resolver.cpp b/src/sql/resolver/ob_resolver.cpp index 049d8fff8f..7ecfcde113 100644 --- a/src/sql/resolver/ob_resolver.cpp +++ b/src/sql/resolver/ob_resolver.cpp @@ -1291,7 +1291,8 @@ int ObResolver::resolve(IsPrepared if_prepared, const ParseNode &parse_tree, ObS } } // end switch - if (OB_SUCC(ret) && stmt->is_dml_stmt()) { + // 外表写只放开insert + if (OB_SUCC(ret) && stmt->is_dml_stmt() && !stmt->is_insert_stmt()) { OZ( (static_cast(stmt)->disable_writing_external_table()) ); } diff --git a/src/sql/resolver/ob_resolver_utils.cpp b/src/sql/resolver/ob_resolver_utils.cpp index ee0d1aabcf..b419345026 100644 --- a/src/sql/resolver/ob_resolver_utils.cpp +++ b/src/sql/resolver/ob_resolver_utils.cpp @@ -4783,7 +4783,8 @@ int ObResolverUtils::calc_file_column_idx(const ObString &column_name, uint64_t int32_t PREFIX_LEN = column_name.prefix_match_ci(N_PARTITION_LIST_COL) ? str_length(N_PARTITION_LIST_COL) : column_name.prefix_match_ci(N_EXTERNAL_FILE_COLUMN_PREFIX)? - str_length(N_EXTERNAL_FILE_COLUMN_PREFIX) : -1; + str_length(N_EXTERNAL_FILE_COLUMN_PREFIX) : column_name.prefix_match_ci(N_EXTERNAL_TABLE_COLUMN_PREFIX) ? + str_length(N_EXTERNAL_TABLE_COLUMN_PREFIX) : -1; if (column_name.length() <= PREFIX_LEN) { ret = OB_ERR_UNEXPECTED; } else { @@ -4863,6 +4864,16 @@ int ObResolverUtils::resolve_external_table_column_def(ObRawExprFactory &expr_fa LOG_WARN("fail to build external table file column expr", K(ret)); } } + } else if (ObExternalFileFormat::ODPS_FORMAT == ObResolverUtils::resolve_external_file_column_type(q_name.col_name_)) { + if (OB_FAIL(ObResolverUtils::calc_file_column_idx(q_name.col_name_, file_column_idx))) { + LOG_WARN("fail to calc file column idx", K(ret)); + } else if (nullptr == (file_column_expr = ObResolverUtils::find_file_column_expr( + real_exprs, OB_INVALID_ID, file_column_idx, q_name.col_name_))) { + if (OB_FAIL(ObResolverUtils::build_file_column_expr_for_odps(expr_factory, session_info, + OB_INVALID_ID, ObString(), q_name.col_name_, file_column_idx, gen_col_schema, file_column_expr))) { + LOG_WARN("fail to build external table file column expr", K(ret)); + } + } } else { if (OB_FAIL(ObResolverUtils::build_file_row_expr_for_parquet(expr_factory, session_info, OB_INVALID_ID, ObString(), @@ -4904,6 +4915,8 @@ ObExternalFileFormat::FormatType ObResolverUtils::resolve_external_file_column_t ObExternalFileFormat::FormatType type = ObExternalFileFormat::INVALID_FORMAT; if (name.prefix_match_ci(N_EXTERNAL_FILE_COLUMN_PREFIX)) { type = ObExternalFileFormat::CSV_FORMAT; + } else if (name.prefix_match_ci(N_EXTERNAL_TABLE_COLUMN_PREFIX)) { + type = ObExternalFileFormat::ODPS_FORMAT; } else if (0 == name.case_compare(N_EXTERNAL_FILE_ROW)) { type = ObExternalFileFormat::PARQUET_FORMAT; } @@ -5048,6 +5061,46 @@ int ObResolverUtils::build_file_row_expr_for_parquet( return ret; } +int ObResolverUtils::build_file_column_expr_for_odps(ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const ObString &table_name, + const ObString &column_name, + int64_t column_idx, + const ObColumnSchemaV2 *column_schema, + ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + ObPseudoColumnRawExpr *file_column_expr = nullptr; + ObItemType type = T_PSEUDO_EXTERNAL_FILE_COL; + uint64_t extra = column_idx; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexcepted null ptr", K(ret)); + } else if (OB_FAIL(expr_factory.create_raw_expr(type, file_column_expr))) { + LOG_WARN("create nextval failed", K(ret)); + } else if (OB_ISNULL(file_column_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is null", K(ret)); + } else { + file_column_expr->set_expr_name(column_name); + file_column_expr->set_table_name(table_name); + file_column_expr->set_table_id(table_id); + file_column_expr->set_explicited_reference(); + file_column_expr->set_extra(extra); + file_column_expr->set_meta_type(column_schema->get_meta_type()); + file_column_expr->set_collation_level(CS_LEVEL_IMPLICIT); + file_column_expr->set_accuracy(column_schema->get_accuracy()); + if (OB_FAIL(file_column_expr->formalize(&session_info))) { + LOG_WARN("failed to extract info", K(ret)); + } else { + expr = file_column_expr; + } + } + + return ret; +} + int ObResolverUtils::build_file_column_expr_for_csv(ObRawExprFactory &expr_factory, const ObSQLSessionInfo &session_info, const uint64_t table_id, @@ -9715,5 +9768,36 @@ int ObResolverUtils::check_schema_valid_for_mview(const ObTableSchema &table_sch return ret; } +bool ObResolverUtils::is_external_pseudo_column(const ObRawExpr &expr) +{ + bool ret = false; + if (T_PSEUDO_EXTERNAL_FILE_COL == expr.get_expr_type() + || T_PSEUDO_EXTERNAL_FILE_URL == expr.get_expr_type() + || T_PSEUDO_EXTERNAL_FILE_ROW == expr.get_expr_type()) { + ret = true; + } + return ret; +} + +int ObResolverUtils::cnt_external_pseudo_column(const ObRawExpr &expr, bool &contain) +{ + int ret = OB_SUCCESS; + contain = false; + if (is_external_pseudo_column(expr)) { + contain = true; + } else { + for (int64_t i = 0; !contain && i < expr.get_children_count(); i++) { + const ObRawExpr *child = expr.get_param_expr(i); + if (OB_ISNULL(child)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid child", K(child)); + } else if (OB_FAIL(SMART_CALL(cnt_external_pseudo_column(*child, contain)))) { + LOG_WARN("failed to check if need to check col duplicate", K(ret)); + } + } + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/resolver/ob_resolver_utils.h b/src/sql/resolver/ob_resolver_utils.h index e8675ae230..0ebb450cc3 100644 --- a/src/sql/resolver/ob_resolver_utils.h +++ b/src/sql/resolver/ob_resolver_utils.h @@ -774,6 +774,15 @@ public: int64_t column_idx, const ObString &expr_name); static int calc_file_column_idx(const ObString &column_name, uint64_t &file_column_idx); + static int build_file_column_expr_for_odps( + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + const uint64_t table_id, + const common::ObString &table_name, + const common::ObString &column_name, + int64_t column_idx, + const ObColumnSchemaV2 *column_schema, + ObRawExpr *&expr); static int build_file_column_expr_for_csv( ObRawExprFactory &expr_factory, const ObSQLSessionInfo &session_info, @@ -879,6 +888,8 @@ public: static int64_t get_mysql_max_partition_num(const uint64_t tenant_id); static int check_schema_valid_for_mview(const share::schema::ObTableSchema &table_schema); + static bool is_external_pseudo_column(const ObRawExpr &expr); + static int cnt_external_pseudo_column(const ObRawExpr &expr, bool &contain); private: static int try_convert_to_unsiged(const ObExprResType restype, ObRawExpr& src_expr, diff --git a/unittest/sql/engine/dml/CMakeLists.txt b/unittest/sql/engine/dml/CMakeLists.txt index 051fbbd933..5b3f1f828d 100644 --- a/unittest/sql/engine/dml/CMakeLists.txt +++ b/unittest/sql/engine/dml/CMakeLists.txt @@ -1,2 +1,3 @@ #ob_unittest(test_table_insert) #ob_unittest(test_insert_up) +#ob_unittest(test_odps) diff --git a/unittest/sql/engine/dml/test_odps.cpp b/unittest/sql/engine/dml/test_odps.cpp new file mode 100644 index 0000000000..6ba706e389 --- /dev/null +++ b/unittest/sql/engine/dml/test_odps.cpp @@ -0,0 +1,98 @@ +#ifdef OB_BUILD_CPP_ODPS +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#include +#include + +#include + +using namespace std; +using namespace apsara; +using namespace apsara::odps; +using namespace apsara::odps::sdk; + +namespace test +{ + +class TestODPS: public ::testing::Test +{ +public: + TestODPS() {} + ~TestODPS() {} +}; + +TEST_F(TestODPS, test_odps) +{ + string ep = ""; + string odpsEndpoint = ""; + string project = "ailing_test"; + string table = "odps_table_jim"; + uint32_t blockId = 0; + + Account account(ACCOUNT_ALIYUN, "", ""); + + Configuration conf; + conf.SetAccount(account); + conf.SetTunnelEndpoint(ep); + conf.SetEndpoint(odpsEndpoint); + conf.SetUserAgent("UPLOAD_EXAMPLE"); + + OdpsTunnel dt; + + try + { + std::vector blocks; + dt.Init(conf); + IUploadPtr upload = dt.CreateUpload(project, table, "ds = 2, other = 'b'", "", true); + for (blockId = 0; blockId < 2; blockId++) { + std::cout << upload->GetStatus() << std::endl; + IRecordWriterPtr wr = upload->OpenWriter(blockId); + ODPSTableRecordPtr _r = upload->CreateBufferRecord(); + std::cout << _r->GetSchema()->GetColumnCount() << std::endl; + ODPSTableRecord& r = *_r; + + for (size_t i = 0; i < 10; i++) + { + if (i % 10 == 0) + { + r.SetNullValue(0); + } + else + { + //r.SetBigIntValue(0, i); + r.SetStringValue(0, std::string("hello") + std::to_string(i)); + } + wr->Write(r); + } + wr->Close(); + std::cout << upload->GetStatus() << std::endl; + blocks.push_back(blockId); + } + upload->Commit(blocks); + std::cout << upload->GetStatus() << std::endl; + } + catch(OdpsException& e) + { + std::cerr << "OdpsTunnelException:\n" << e.what() << std::endl; + } +} +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} +#endif \ No newline at end of file diff --git a/unittest/sql/test_sql_utils.h b/unittest/sql/test_sql_utils.h index d3d8557606..bbe25e0b86 100644 --- a/unittest/sql/test_sql_utils.h +++ b/unittest/sql/test_sql_utils.h @@ -135,7 +135,7 @@ public: ObSchemaGetterGuard &get_schema_guard() { return schema_guard_; } public: //table id - hash::ObHashMap next_user_table_id_map_; + oceanbase::common::hash::ObHashMap next_user_table_id_map_; //user_id uint64_t sys_user_id_; uint64_t next_user_id_;