[fix](json)Fix the bug that does not stop when reading json files (#23062)

* [fix](json)Fix the bug that does not stop when reading json files
This commit is contained in:
daidai
2023-08-18 18:23:19 +08:00
committed by GitHub
parent 477961dc21
commit 419e922a69
3 changed files with 78 additions and 0 deletions

View File

@ -1054,6 +1054,7 @@ Status NewJsonReader::_read_one_message(std::unique_ptr<uint8_t[]>* file_buf, si
file_buf->reset(new uint8_t[file_size]);
Slice result(file_buf->get(), file_size);
RETURN_IF_ERROR(_file_reader->read_at(_current_offset, result, read_size, _io_ctx));
_current_offset += *read_size;
break;
}
case TFileType::FILE_STREAM: {

View File

@ -257,6 +257,43 @@
8 chengdu 2345678
9 xian 2345679
-- !json_limit1 --
1 beijing 2345671
10 hefei 23456710
11 \N 23456711
12 hefei \N
2 shanghai 2345672
3 guangzhou 2345673
4 shenzhen 2345674
5 hangzhou 2345675
6 nanjing 2345676
7 wuhan 2345677
8 chengdu 2345678
9 xian 2345679
-- !json_limit2 --
1 beijing 1454547
10 hefei 2345676
2 shanghai 1244264
3 guangzhou 528369
4 shenzhen 594201
5 hangzhou 594201
6 nanjing 2345672
7 wuhan 2345673
8 chengdu 2345674
9 xian 2345675
-- !json_limit3 --
1 {"id": 1, "city": "beijing", "code": 2345671}
2 {"id": 2, "city": "shanghai", "code": 2345672}
3 {"id": 3, "city": "hangzhou", "code": 2345673}
4 {"id": 4, "city": "shenzhen", "code": 2345674}
5 {"id": 5, "city": "guangzhou", "code": 2345675}
-- !json_limit4 --
1 {"id": 1, "city": "beijing", "code": 2345671}
2 {"id": 2, "city": "shanghai", "code": 2345672}
-- !json_root --
1 beijing 2345671
2 shanghai 2345672

View File

@ -132,6 +132,46 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker") {
"strip_outer_array" = "false",
"read_json_by_line" = "true") order by id; """
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
format = "json"
qt_json_limit1 """ select * from HDFS(
"uri" = "${uri}",
"fs.defaultFS"= "${defaultFS}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}",
"strip_outer_array" = "false",
"read_json_by_line" = "true") order by id limit 100; """
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/one_array_json.json"
format = "json"
qt_json_limit2 """ select * from HDFS(
"uri" = "${uri}",
"fs.defaultFS"= "${defaultFS}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}",
"strip_outer_array" = "true",
"read_json_by_line" = "false") order by id limit 100; """
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json"
format = "json"
qt_json_limit3 """ select * from HDFS(
"uri" = "${uri}",
"fs.defaultFS"= "${defaultFS}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}",
"strip_outer_array" = "false",
"read_json_by_line" = "true") order by no limit 100; """
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json"
format = "json"
qt_json_limit4 """ select * from HDFS(
"uri" = "${uri}",
"fs.defaultFS"= "${defaultFS}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}",
"strip_outer_array" = "false",
"read_json_by_line" = "true") order by no limit 2; """
// test json root
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json"
format = "json"