From 4be589951b57fbd9199e1168087df51865eddb4e Mon Sep 17 00:00:00 2001 From: yiguolei Date: Tue, 7 May 2024 18:03:56 +0800 Subject: [PATCH] Revert "Revert "[fix](csv-reader) fix column split error when there is escape character (#34364)"" This reverts commit d127d67ebe989484bbdf340a4de5b79ded56eecc. --- .../exec/format/file_reader/new_plain_text_line_reader.cpp | 7 +++---- .../exec/format/file_reader/new_plain_text_line_reader.h | 2 ++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp index c395e52f36..fefd5ecae6 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp +++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp @@ -143,13 +143,12 @@ void EncloseCsvLineReaderContext::_on_normal(const uint8_t* start, size_t& len) } void EncloseCsvLineReaderContext::_on_pre_match_enclose(const uint8_t* start, size_t& len) { - bool should_escape = false; do { do { if (start[_idx] == _escape) [[unlikely]] { - should_escape = !should_escape; - } else if (should_escape) [[unlikely]] { - should_escape = false; + _should_escape = !_should_escape; + } else if (_should_escape) [[unlikely]] { + _should_escape = false; } else if (start[_idx] == _enclose) [[unlikely]] { _state.forward_to(ReaderState::MATCH_ENCLOSE); ++_idx; diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h index babfc13641..0b0d9f133f 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h +++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h @@ -135,6 +135,7 @@ public: inline void refresh_impl() { _idx = 0; + _should_escape = false; _result = nullptr; _column_sep_positions.clear(); _state.reset(); @@ -168,6 +169,7 @@ private: const size_t _column_sep_len; size_t _idx = 0; + bool _should_escape = false; const std::string _column_sep; std::vector _column_sep_positions;