Files
doris/be/src/olap/push_handler.h
wyb 3f7307d685 [Spark Load]Add spark etl job main class (#3927)
1. Add SparkEtlJob class
2. Remove DppResult comment
3. Support loading from hive table directly

#3433
2020-06-24 13:54:55 +08:00

260 lines
6.6 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_BE_SRC_OLAP_PUSH_HANDLER_H
#define DORIS_BE_SRC_OLAP_PUSH_HANDLER_H
#include <map>
#include <string>
#include <vector>
#include "exec/base_scanner.h"
#include "gen_cpp/AgentService_types.h"
#include "gen_cpp/MasterService_types.h"
#include "gen_cpp/PaloInternalService_types.h"
#include "olap/file_helper.h"
#include "olap/merger.h"
#include "olap/olap_common.h"
#include "olap/rowset/rowset.h"
#include "olap/row_cursor.h"
namespace doris {
class BinaryFile;
class BinaryReader;
class ColumnMapping;
class RowCursor;
struct TabletVars {
TabletSharedPtr tablet;
RowsetSharedPtr rowset_to_add;
};
class PushHandler {
public:
typedef std::vector<ColumnMapping> SchemaMapping;
PushHandler() {}
~PushHandler() {}
// Load local data file into specified tablet.
OLAPStatus process_streaming_ingestion(
TabletSharedPtr tablet,
const TPushReq& request,
PushType push_type,
std::vector<TTabletInfo>* tablet_info_vec);
int64_t write_bytes() const { return _write_bytes; }
int64_t write_rows() const { return _write_rows; }
private:
OLAPStatus _convert_v2(
TabletSharedPtr cur_tablet,
TabletSharedPtr new_tablet_vec,
RowsetSharedPtr* cur_rowset,
RowsetSharedPtr* new_rowset);
// Convert local data file to internal formatted delta,
// return new delta's SegmentGroup
OLAPStatus _convert(
TabletSharedPtr cur_tablet,
TabletSharedPtr new_tablet_vec,
RowsetSharedPtr* cur_rowset,
RowsetSharedPtr* new_rowset);
// Only for debug
std::string _debug_version_list(const Versions& versions) const;
void _get_tablet_infos(
const std::vector<TabletVars>& tablet_infos,
std::vector<TTabletInfo>* tablet_info_vec);
OLAPStatus _do_streaming_ingestion(
TabletSharedPtr tablet,
const TPushReq& request,
PushType push_type,
vector<TabletVars>* tablet_vars,
std::vector<TTabletInfo>* tablet_info_vec);
private:
// mainly tablet_id, version and delta file path
TPushReq _request;
int64_t _write_bytes = 0;
int64_t _write_rows = 0;
DISALLOW_COPY_AND_ASSIGN(PushHandler);
};
// package FileHandlerWithBuf to read header of dpp output file
class BinaryFile : public FileHandlerWithBuf {
public:
BinaryFile() {}
virtual ~BinaryFile() {
close();
}
OLAPStatus init(const char* path);
size_t header_size() const {
return _header.size();
}
size_t file_length() const {
return _header.file_length();
}
uint32_t checksum() const {
return _header.checksum();
}
SchemaHash schema_hash() const {
return _header.message().schema_hash();
}
private:
FileHeader<OLAPRawDeltaHeaderMessage, int32_t, FileHandlerWithBuf> _header;
DISALLOW_COPY_AND_ASSIGN(BinaryFile);
};
class IBinaryReader {
public:
static IBinaryReader* create(bool need_decompress);
virtual ~IBinaryReader() {}
virtual OLAPStatus init(TabletSharedPtr tablet, BinaryFile* file) = 0;
virtual OLAPStatus finalize() = 0;
virtual OLAPStatus next(RowCursor* row) = 0;
virtual bool eof() = 0;
// call this function after finalize()
bool validate_checksum() {
return _adler_checksum == _file->checksum();
}
protected:
IBinaryReader()
: _file(NULL),
_content_len(0),
_curr(0),
_adler_checksum(ADLER32_INIT),
_ready(false) {
}
BinaryFile* _file;
TabletSharedPtr _tablet;
size_t _content_len;
size_t _curr;
uint32_t _adler_checksum;
bool _ready;
};
// input file reader for Protobuffer format
class BinaryReader: public IBinaryReader {
public:
explicit BinaryReader();
virtual ~BinaryReader() {
finalize();
}
virtual OLAPStatus init(TabletSharedPtr tablet, BinaryFile* file);
virtual OLAPStatus finalize();
virtual OLAPStatus next(RowCursor* row);
virtual bool eof() {
return _curr >= _content_len;
}
private:
char* _row_buf;
size_t _row_buf_size;
};
class LzoBinaryReader: public IBinaryReader {
public:
explicit LzoBinaryReader();
virtual ~LzoBinaryReader() {
finalize();
}
virtual OLAPStatus init(TabletSharedPtr tablet, BinaryFile* file);
virtual OLAPStatus finalize();
virtual OLAPStatus next(RowCursor* row);
virtual bool eof() {
return _curr >= _content_len && _row_num == 0;
}
private:
OLAPStatus _next_block();
typedef uint32_t RowNumType;
typedef uint64_t CompressedSizeType;
char* _row_buf;
char* _row_compressed_buf;
char* _row_info_buf;
size_t _max_row_num;
size_t _max_row_buf_size;
size_t _max_compressed_buf_size;
size_t _row_num;
size_t _next_row_start;
};
class PushBrokerReader {
public:
PushBrokerReader()
: _ready(false),
_eof(false) {}
~PushBrokerReader() {}
OLAPStatus init(const Schema* schema,
const TBrokerScanRange& t_scan_range,
const TDescriptorTable& t_desc_tbl);
OLAPStatus next(ContiguousRow* row);
void print_profile();
OLAPStatus close() {
_ready = false;
return OLAP_SUCCESS;
}
bool eof() {
return _eof;
}
MemPool* mem_pool() {
return _mem_pool.get();
}
private:
bool _ready;
bool _eof;
TupleDescriptor* _tuple_desc;
Tuple* _tuple;
const Schema* _schema;
std::unique_ptr<RuntimeState> _runtime_state;
RuntimeProfile* _runtime_profile;
std::unique_ptr<MemTracker> _mem_tracker;
std::unique_ptr<MemPool> _mem_pool;
std::unique_ptr<ScannerCounter> _counter;
std::unique_ptr<BaseScanner> _scanner;
};
} // namespace doris
#endif // DORIS_BE_SRC_OLAP_PUSH_HANDLER_H