Files
doris/be/src/runtime/data_spliter.h
chenhao7253886 37b4cafe87 Change variable and namespace name in BE (#268)
Change 'palo' to 'doris'
2018-11-02 10:22:32 +08:00

115 lines
3.5 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_BE_RUNTIME_DATA_SPLITER_H
#define DORIS_BE_RUNTIME_DATA_SPLITER_H
#include <unordered_map>
#include <string>
#include <vector>
#include "common/status.h"
#include "exec/data_sink.h"
#include "runtime/dpp_sink_internal.h"
#include "util/runtime_profile.h"
namespace doris {
class RowDescriptor;
class RuntimeState;
class RowBatch;
class ObjectPool;
class TDataSplitSink;
class Expr;
class ExprContext;
class PartitionInfo;
class RollupSchema;
class TupleRow;
class DppSink;
class MemTracker;
// DataSpliter used to split data input to groups of data
// according to partition information, distributed information
// and rollup information.
// TODO(zc): think about this to make it more reusable
class DataSpliter : public DataSink {
public:
// Construct from thrift struct which is generated by FE.
DataSpliter(const RowDescriptor& row_desc);
virtual ~DataSpliter();
virtual Status prepare(RuntimeState* state);
virtual Status open(RuntimeState* state);
virtual Status send(RuntimeState* state, RowBatch* batch);
virtual Status close(RuntimeState* state, Status close_status);
// Returns the runtime profile for the sink.
virtual RuntimeProfile* profile() {
return _profile;
}
static Status from_thrift(ObjectPool* pool,
const TDataSplitSink& t_sink,
DataSpliter* spliter);
private:
int binary_find_partition(const PartRangeKey& key) const;
Status process_partition(
RuntimeState* state, TupleRow* row, PartitionInfo** info, int32_t* part_index);
Status process_distribute(RuntimeState* state, TupleRow* row,
const PartitionInfo* part, uint32_t* mod);
Status send_row(
RuntimeState* state, const TabletDesc& desc, TupleRow* row, DppSink* dpp_sink);
Status process_one_row(RuntimeState* state, TupleRow* row);
boost::scoped_ptr<ObjectPool> _obj_pool;
const RowDescriptor& _row_desc;
// Information used to partition data
// partition exprs
std::vector<ExprContext*> _partition_expr_ctxs;
// map from range value to partition_id
// sorted in ascending orderi by range for binary search
std::vector<PartitionInfo*> _partition_infos;
// Information of rollup
// from name to rollup information.
std::map<std::string, RollupSchema*> _rollup_map;
std::unordered_map<TabletDesc, RowBatch*> _batch_map;
std::unordered_map<TabletDesc, DppSink*> _sink_map;
std::vector<DppSink*> _dpp_sink_vec;
// Allocated from _pool
RuntimeProfile* _profile;
// Wall time senders spend waiting for the recv buffer to have capacity.
RuntimeProfile::Counter* _split_timer;
RuntimeProfile::Counter* _finish_timer;
};
}
#endif