[fix](broker-scan-node) Remove trailing spaces in broker_scanner. Make it consistent with hive and trino behavior. (#9190)
Hive and trino/presto would automatically trim the trailing spaces but Doris doesn't. This would cause different query result with hive. Add a new session variable "trim_tailing_spaces_for_external_table_query". If set to true, when reading csv from broker scan node, it will trim the tailing space of the column
This commit is contained in:
@ -339,19 +339,20 @@ void BrokerScanner::split_line(const Slice& line) {
|
||||
delete[] ptr;
|
||||
} else {
|
||||
const char* value = line.data;
|
||||
size_t start = 0; // point to the start pos of next col value.
|
||||
size_t curpos = 0; // point to the start pos of separator matching sequence.
|
||||
size_t p1 = 0; // point to the current pos of separator matching sequence.
|
||||
size_t start = 0; // point to the start pos of next col value.
|
||||
size_t curpos = 0; // point to the start pos of separator matching sequence.
|
||||
size_t p1 = 0; // point to the current pos of separator matching sequence.
|
||||
size_t non_space = 0; // point to the last pos of non_space charactor.
|
||||
|
||||
// Separator: AAAA
|
||||
//
|
||||
// curpos
|
||||
// p1
|
||||
// ▼
|
||||
// AAAA
|
||||
// 1000AAAA2000AAAA
|
||||
// ▲ ▲
|
||||
// Start │
|
||||
// p1
|
||||
// curpos
|
||||
|
||||
while (curpos < line.size) {
|
||||
if (*(value + curpos + p1) != _value_separator[p1]) {
|
||||
@ -362,16 +363,30 @@ void BrokerScanner::split_line(const Slice& line) {
|
||||
p1++;
|
||||
if (p1 == _value_separator_length) {
|
||||
// Match a separator
|
||||
_split_values.emplace_back(value + start, curpos - start);
|
||||
non_space = curpos;
|
||||
// Trim tailing spaces. Be consistent with hive and trino's behavior.
|
||||
if (_state->trim_tailing_spaces_for_external_table_query()) {
|
||||
while (non_space > start && *(value + non_space - 1) == ' ') {
|
||||
non_space--;
|
||||
}
|
||||
}
|
||||
_split_values.emplace_back(value + start, non_space - start);
|
||||
start = curpos + _value_separator_length;
|
||||
curpos = start;
|
||||
p1 = 0;
|
||||
non_space = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(curpos == line.size) << curpos << " vs " << line.size;
|
||||
_split_values.emplace_back(value + start, curpos - start);
|
||||
non_space = curpos;
|
||||
if (_state->trim_tailing_spaces_for_external_table_query()) {
|
||||
while (non_space > start && *(value + non_space - 1) == ' ') {
|
||||
non_space--;
|
||||
}
|
||||
}
|
||||
_split_values.emplace_back(value + start, non_space - start);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -326,6 +326,10 @@ public:
|
||||
|
||||
bool enable_vectorized_exec() const { return _query_options.enable_vectorized_engine; }
|
||||
|
||||
bool trim_tailing_spaces_for_external_table_query() const {
|
||||
return _query_options.trim_tailing_spaces_for_external_table_query;
|
||||
}
|
||||
|
||||
bool return_object_data_as_binary() const {
|
||||
return _query_options.return_object_data_as_binary;
|
||||
}
|
||||
|
||||
@ -180,6 +180,8 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
public static final String ENABLE_PROJECTION = "enable_projection";
|
||||
|
||||
public static final String TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY = "trim_tailing_spaces_for_external_table_query";
|
||||
|
||||
// session origin value
|
||||
public Map<Field, String> sessionOriginValue = new HashMap<Field, String>();
|
||||
// check stmt is or not [select /*+ SET_VAR(...)*/ ...]
|
||||
@ -439,6 +441,9 @@ public class SessionVariable implements Serializable, Writable {
|
||||
@VariableMgr.VarAttr(name = ENABLE_PROJECTION)
|
||||
private boolean enableProjection = true;
|
||||
|
||||
@VariableMgr.VarAttr(name = TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY, needForward = true)
|
||||
public boolean trimTailingSpacesForExternalTableQuery = false;
|
||||
|
||||
public String getBlockEncryptionMode() {
|
||||
return blockEncryptionMode;
|
||||
}
|
||||
@ -895,6 +900,14 @@ public class SessionVariable implements Serializable, Writable {
|
||||
return enableProjection;
|
||||
}
|
||||
|
||||
public boolean isTrimTailingSpacesForExternalTableQuery() {
|
||||
return trimTailingSpacesForExternalTableQuery;
|
||||
}
|
||||
|
||||
public void setTrimTailingSpacesForExternalTableQuery(boolean trimTailingSpacesForExternalTableQuery) {
|
||||
this.trimTailingSpacesForExternalTableQuery = trimTailingSpacesForExternalTableQuery;
|
||||
}
|
||||
|
||||
// Serialize to thrift object
|
||||
// used for rest api
|
||||
public TQueryOptions toThrift() {
|
||||
@ -912,6 +925,7 @@ public class SessionVariable implements Serializable, Writable {
|
||||
tResult.setCodegenLevel(codegenLevel);
|
||||
tResult.setEnableVectorizedEngine(enableVectorizedEngine);
|
||||
tResult.setReturnObjectDataAsBinary(returnObjectDataAsBinary);
|
||||
tResult.setTrimTailingSpacesForExternalTableQuery(trimTailingSpacesForExternalTableQuery);
|
||||
|
||||
tResult.setBatchSize(batchSize);
|
||||
tResult.setDisableStreamPreaggregations(disableStreamPreaggregations);
|
||||
|
||||
@ -160,6 +160,9 @@ struct TQueryOptions {
|
||||
// show bitmap data in result, if use this in mysql cli may make the terminal
|
||||
// output corrupted character
|
||||
43: optional bool return_object_data_as_binary = false
|
||||
|
||||
// trim tailing spaces while querying external table and stream load
|
||||
44: optional bool trim_tailing_spaces_for_external_table_query = false
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user