[fix](csv_reader) fix trim_double_quotes behavior change (#27882)

This commit is contained in:
HHoflittlefish777
2023-12-03 22:57:55 +08:00
committed by GitHub
parent 3ddc8211d1
commit 97d36b4f38
6 changed files with 79 additions and 1 deletions

View File

@ -176,6 +176,26 @@ public:
}
return change;
}
/// Remove quote char '"' which should exist as first and last char.
///
/// @pre n <= size
///
/// @note Only the base and bounds of the slice are changed;
/// the data is not modified.
///
/// @param [in] n
/// Number of bytes of space that should be dropped from the beginning.
bool trim_double_quotes() {
int32_t begin = 0;
if (size > 2 && (data[begin] == '"' && data[size - 1] == '"')) {
data += 1;
size -= 2;
return true;
}
return false;
}
/// Truncate the slice to the given number of bytes.
///
/// @pre n <= size

View File

@ -619,7 +619,7 @@ Status CsvReader::_create_decompressor() {
template <bool from_json>
Status CsvReader::deserialize_nullable_string(IColumn& column, Slice& slice) {
auto& null_column = assert_cast<ColumnNullable&>(column);
if (!(from_json && _options.converted_from_string && slice.trim_quote())) {
if (!(from_json && _options.converted_from_string && slice.trim_double_quotes())) {
if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') {
null_column.insert_data(nullptr, 0);
return Status::OK();

View File

@ -19,3 +19,5 @@
7 8 3 abc 2022-12-01 2022-12-01T09:30:31
8 9 3 abc 2022-12-01 2022-12-01T09:30:31
-- !sql_test_single_quote --
'a' 21 'b'

View File

@ -0,0 +1 @@
?"a"?,21,?"b"?
Can't render this file because it contains an unexpected character in line 1 and column 2.

View File

@ -0,0 +1 @@
'a',21,'b'
1 'a' 21 'b'

View File

@ -59,4 +59,58 @@ suite("test_csv_with_double_quotes", "p0") {
sql "sync"
qt_sql "select * from ${tableName} order by k1, k2"
sql """ DROP TABLE IF EXISTS ${tableName} """
def create_table = { testTablex ->
sql """
CREATE TABLE `${testTablex}` (
`name` varchar(48) NULL,
`age` bigint(20) NULL,
`agent_id` varchar(256) NULL
) ENGINE=OLAP
DUPLICATE KEY(`name`)
COMMENT 'OLAP'
DISTRIBUTED BY RANDOM BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);
"""
}
def tableName1 = "test_single_quotes"
try {
sql "DROP TABLE IF EXISTS ${tableName1}"
create_table.call(tableName1)
streamLoad {
table "${tableName1}"
set 'column_separator', ','
set 'trim_double_quotes', 'true'
file 'test_single_quote.csv'
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(1, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
assertEquals(0, json.NumberUnselectedRows)
}
}
qt_sql_test_single_quote "SELECT * FROM ${tableName1} order by name"
} finally {
sql "DROP TABLE IF EXISTS ${tableName1}"
}
}