[fix](array-type) fix the invalid format load for stream load (#12424)
this pr is used to fix the invalid format load for stream load.
before the change , we will get the error when we load the invalid array format.
the origin file to load :
1 [1, 2, 3]
2 [4, 5, 6]
3 \N
4 [7, \N, 8]
5 10, 11, 12
[hugo@xafj-palo]$ sh curl_cmd.sh
{
"TxnId": 11035,
"Label": "11c9f111-188e-4616-9a50-aec8b7814513",
"TwoPhaseCommit": "false",
"Status": "Fail",
"Message": "Array does not start with '[' character, found '1'",
"NumberTotalRows": 0,
"NumberLoadedRows": 0,
"NumberFilteredRows": 0,
"NumberUnselectedRows": 0,
"LoadBytes": 55,
"LoadTimeMs": 7,
"BeginTxnTimeMs": 0,
"StreamLoadPutTimeMs": 2,
"ReadDataTimeMs": 0,
"WriteDataTimeMs": 3,
"CommitAndPublishTimeMs": 0
}
3. after this change, we will get success and the error url which report the error line.
[hugo@xafj-palo]$ sh curl_cmd.sh
{
"TxnId": 11046,
"Label": "249808ee-55f4-4c08-b671-b3d82689d614",
"TwoPhaseCommit": "false",
"Status": "Success",
"Message": "OK",
"NumberTotalRows": 5,
"NumberLoadedRows": 4,
"NumberFilteredRows": 1,
"NumberUnselectedRows": 0,
"LoadBytes": 55,
"LoadTimeMs": 39,
"BeginTxnTimeMs": 0,
"StreamLoadPutTimeMs": 2,
"ReadDataTimeMs": 0,
"WriteDataTimeMs": 19,
"CommitAndPublishTimeMs": 16,
"ErrorURL": "http://10.81.85.89:8502/api/_load_error_log?file=__shard_3/error_log_insert_stmt_8d4130f0c18aeb0a-ad7ffd4233c41893_8d4130f0c18aeb0a_ad7ffd4233c41893"
}
the sql select result:
MySQL [example_db]> select * from array_test06;
+------+--------------+
| k1 | k2 |
+------+--------------+
| 1 | [1, 2, 3] |
| 2 | [4, 5, 6] |
| 3 | NULL |
| 4 | [7, NULL, 8] |
+------+--------------+
4 rows in set (0.019 sec)
the url page show us:
"Reason: Invalid format for array column(k2). src line [10, 11, 12]; "
Issue Number: #7570
This commit is contained in:
@ -487,4 +487,38 @@ void BaseScanner::_fill_columns_from_path() {
|
||||
}
|
||||
}
|
||||
|
||||
bool BaseScanner::is_null(const Slice& slice) {
|
||||
return slice.size == 2 && slice.data[0] == '\\' && slice.data[1] == 'N';
|
||||
}
|
||||
|
||||
bool BaseScanner::is_array(const Slice& slice) {
|
||||
return slice.size > 1 && slice.data[0] == '[' && slice.data[slice.size - 1] == ']';
|
||||
}
|
||||
|
||||
bool BaseScanner::check_array_format(std::vector<Slice>& split_values) {
|
||||
// if not the array format, filter this line and return error url
|
||||
auto dest_slot_descs = _dest_tuple_desc->slots();
|
||||
for (int j = 0; j < split_values.size() && j < dest_slot_descs.size(); ++j) {
|
||||
auto dest_slot_desc = dest_slot_descs[j];
|
||||
if (!dest_slot_desc->is_materialized()) {
|
||||
continue;
|
||||
}
|
||||
const Slice& value = split_values[j];
|
||||
if (dest_slot_desc->type().is_array_type() && !is_null(value) && !is_array(value)) {
|
||||
RETURN_IF_ERROR(_state->append_error_msg_to_file(
|
||||
[&]() -> std::string { return std::string(value.data, value.size); },
|
||||
[&]() -> std::string {
|
||||
fmt::memory_buffer err_msg;
|
||||
fmt::format_to(err_msg, "Invalid format for array column({})",
|
||||
dest_slot_desc->col_name());
|
||||
return fmt::to_string(err_msg);
|
||||
},
|
||||
&_scanner_eof));
|
||||
_counter->num_rows_filtered++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
Reference in New Issue
Block a user