From 419e922a6925bb02bf44c4d5e806c0fbcfa1fef7 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Fri, 18 Aug 2023 18:23:19 +0800 Subject: [PATCH] [fix](json)Fix the bug that does not stop when reading json files (#23062) * [fix](json)Fix the bug that does not stop when reading json files --- .../vec/exec/format/json/new_json_reader.cpp | 1 + .../external_table_p0/tvf/test_hdfs_tvf.out | 37 +++++++++++++++++ .../tvf/test_hdfs_tvf.groovy | 40 +++++++++++++++++++ 3 files changed, 78 insertions(+) diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index eb5c602238..b02c30807d 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -1054,6 +1054,7 @@ Status NewJsonReader::_read_one_message(std::unique_ptr* file_buf, si file_buf->reset(new uint8_t[file_size]); Slice result(file_buf->get(), file_size); RETURN_IF_ERROR(_file_reader->read_at(_current_offset, result, read_size, _io_ctx)); + _current_offset += *read_size; break; } case TFileType::FILE_STREAM: { diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out index 9fb4acdf35..9ae369b977 100644 --- a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out +++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out @@ -257,6 +257,43 @@ 8 chengdu 2345678 9 xian 2345679 +-- !json_limit1 -- +1 beijing 2345671 +10 hefei 23456710 +11 \N 23456711 +12 hefei \N +2 shanghai 2345672 +3 guangzhou 2345673 +4 shenzhen 2345674 +5 hangzhou 2345675 +6 nanjing 2345676 +7 wuhan 2345677 +8 chengdu 2345678 +9 xian 2345679 + +-- !json_limit2 -- +1 beijing 1454547 +10 hefei 2345676 +2 shanghai 1244264 +3 guangzhou 528369 +4 shenzhen 594201 +5 hangzhou 594201 +6 nanjing 2345672 +7 wuhan 2345673 +8 chengdu 2345674 +9 xian 2345675 + +-- !json_limit3 -- +1 {"id": 1, "city": "beijing", "code": 2345671} +2 {"id": 2, "city": "shanghai", "code": 2345672} +3 {"id": 3, "city": "hangzhou", "code": 2345673} +4 {"id": 4, "city": "shenzhen", "code": 2345674} +5 {"id": 5, "city": "guangzhou", "code": 2345675} + +-- !json_limit4 -- +1 {"id": 1, "city": "beijing", "code": 2345671} +2 {"id": 2, "city": "shanghai", "code": 2345672} + -- !json_root -- 1 beijing 2345671 2 shanghai 2345672 diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy index 06d6c12c14..32315f60e6 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy @@ -132,6 +132,46 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker") { "strip_outer_array" = "false", "read_json_by_line" = "true") order by id; """ + + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json" + format = "json" + qt_json_limit1 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by id limit 100; """ + + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/one_array_json.json" + format = "json" + qt_json_limit2 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "false") order by id limit 100; """ + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json" + format = "json" + qt_json_limit3 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by no limit 100; """ + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json" + format = "json" + qt_json_limit4 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by no limit 2; """ + + // test json root uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json" format = "json"