// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef DORIS_BE_SRC_QUERY_EXEC_CSV_SCAN_NODE_H #define DORIS_BE_SRC_QUERY_EXEC_CSV_SCAN_NODE_H #include #include #include #include "common/config.h" #include "exec/csv_scanner.h" #include "exec/scan_node.h" #include "runtime/descriptors.h" namespace doris { class TextConverter; class Tuple; class TupleDescriptor; class RuntimeState; class MemPool; class Status; class CsvScanNode : public ScanNode { public: CsvScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); ~CsvScanNode(); virtual Status init(const TPlanNode& tnode); // initialize _csv_scanner, and create _text_converter. virtual Status prepare(RuntimeState* state); // Start CSV scan using _csv_scanner. virtual Status open(RuntimeState* state); // Fill the next row batch by calling next() on the _csv_scanner, // converting text data in CSV cells to binary data. virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); // Release memory, report 'Counter', and report errors. virtual Status close(RuntimeState* state); // No use in csv scan process virtual Status set_scan_ranges(const std::vector& scan_ranges); // Write debug string of this into out. virtual void debug_string(int indentation_level, std::stringstream* out) const; private: bool check_and_write_text_slot( const std::string& column_name, const TColumnType& column_type, const char* value, int value_length, const SlotDescriptor* slot, RuntimeState* state, std::stringstream* error_msg); // split one line into fields, check every fields, fill every filed into tuple bool split_check_fill(const std::string& line, RuntimeState* state); void fill_fix_length_string( const char* value, int value_length, MemPool* pool, char** new_value, int new_value_length); bool check_decimal_input( const char* value, int value_length, int precision, int scale, std::stringstream* error_msg); void hll_hash(const char* src, int len, std::string* result); bool check_hll_function(TMiniLoadEtlFunction& function); // Tuple id resolved in prepare() to set _tuple_desc; TupleId _tuple_id; std::vector _file_paths; std::string _column_separator; std::map _column_type_map; // mapping function std::map _column_function_map; std::vector _columns; // 'unspecified_columns' is map one-for-one to '_default_values' in the same order std::vector _unspecified_columns; std::vector _default_values; // Map one-for-one to '_columns' in the same order std::vector _column_slot_vec; std::vector _column_type_vec; // Map one-for-one to '_unspecified_columns' in the same order std::vector _unspecified_colomn_slot_vec; std::vector _unspecified_colomn_type_vec; bool _is_init; // Descriptor of tuples read from CSV file. const TupleDescriptor* _tuple_desc; // Tuple index in tuple row. int _slot_num; // Pool for allocating tuple data, including all varying-length slots. boost::scoped_ptr _tuple_pool; // Util class for doing real file reading boost::scoped_ptr _csv_scanner; // Helper class for converting text to other types; boost::scoped_ptr _text_converter; // Current tuple. Tuple* _tuple; // Current RuntimeState RuntimeState* _runtime_state; int64_t _error_row_number = 0L; int64_t _normal_row_number = 0L; RuntimeProfile::Counter* _split_check_timer; RuntimeProfile::Counter* _split_line_timer; // count hll value num int _hll_column_num; std::map _column_slot_map; }; } // end namespace doris #endif // DORIS_BE_SRC_QUERY_EXEC_CSV_SCAN_NODE_H