Files
doris/be/src/runtime/dpp_writer.cpp
2019-06-14 23:38:31 +08:00

282 lines
8.4 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/dpp_writer.h"
#include <stdio.h>
#include <vector>
#include "olap/utils.h"
#include "exprs/expr.h"
#include "util/debug_util.h"
#include "util/types.h"
#include "runtime/primitive_type.h"
#include "runtime/row_batch.h"
#include "runtime/tuple_row.h"
namespace doris {
DppWriter::DppWriter(
int32_t schema_hash,
const std::vector<ExprContext*>& output_expr_ctxs,
FileHandler* fp) :
_schema_hash(schema_hash),
_output_expr_ctxs(output_expr_ctxs),
_fp(fp),
_buf(nullptr),
_end(nullptr),
_pos(nullptr),
_write_len(0),
_content_adler32(1) {
_num_null_slots = 0;
for (int i = 0; i < _output_expr_ctxs.size(); ++i) {
if (true == _output_expr_ctxs[i]->is_nullable()) {
_num_null_slots += 1;
}
}
_num_null_bytes = (_num_null_slots + 7) / 8;
}
DppWriter::~DppWriter() {
if (_buf) {
delete[] _buf;
}
}
Status DppWriter::open() {
// Write header
_header.mutable_message()->set_schema_hash(_schema_hash);
_header.prepare(_fp);
_content_adler32 = 1;
// seek to size()
_fp->seek(_header.size(), SEEK_SET);
// new buf
const int k_buf_len = 16 * 1024;
_buf = new char[k_buf_len];
_pos = _buf;
_end = _buf + k_buf_len;
return Status::OK();
}
void DppWriter::reset_buf() {
_pos = _buf;
}
void DppWriter::append_to_buf(const void* ptr, int len) {
if (_pos + len > _end) {
// enlarge
int cur_len = _pos - _buf;
int old_buf_len = _end - _buf;
int new_len = std::max(2 * old_buf_len, old_buf_len + len);
char *new_buf = new char[new_len];
memcpy(new_buf, _buf, cur_len);
delete[] _buf;
_buf = new_buf;
_pos = _buf + cur_len;
_end = _buf + new_len;
}
memcpy(_pos, ptr, len);
_pos += len;
}
void DppWriter::increase_buf(int len) {
//increase buf to store NULL bytes
//len is the bytes of NULL
if (_pos + len > _end) {
int cur_len = _pos - _buf;
int old_buf_len = _end - _buf;
int new_len = std::max(2 * old_buf_len, old_buf_len + len);
char *new_buf = new char[new_len];
memcpy(new_buf, _buf, cur_len);
delete[] _buf;
_buf = new_buf;
_pos = _buf + cur_len;
_end = _buf + new_len;
}
memset(_pos, 0, len);
_pos += len;
}
Status DppWriter::append_one_row(TupleRow* row) {
int num_columns = _output_expr_ctxs.size();
int off = 0;
int pos = _pos - _buf;
increase_buf(_num_null_bytes);
for (int i = 0; i < num_columns; ++i) {
char* position = _buf + pos;
void* item = _output_expr_ctxs[i]->get_value(row);
// What happend failed???
if (true == _output_expr_ctxs[i]->is_nullable()) {
int index = off % 8;
if (item == nullptr) {
//store NULL bytes
position[off / 8] |= 1 << (7 - index);
off += 1;
continue;
} else {
position[off / 8] &= ~(1 << (7 - index));
off += 1;
}
}
switch (_output_expr_ctxs[i]->root()->type().type) {
case TYPE_TINYINT:
append_to_buf(item, 1);
break;
case TYPE_SMALLINT:
append_to_buf(item, 2);
break;
case TYPE_INT:
append_to_buf(item, 4);
break;
case TYPE_BIGINT:
append_to_buf(item, 8);
break;
case TYPE_LARGEINT:
append_to_buf(item, 16);
break;
case TYPE_FLOAT:
append_to_buf(item, 4);
break;
case TYPE_DOUBLE:
append_to_buf(item, 8);
break;
case TYPE_DATE: {
const DateTimeValue* time_val = (const DateTimeValue*)(item);
uint64_t val = time_val->to_olap_date();
uint8_t char_val = val & 0xff;
append_to_buf(&char_val, 1);
val >>= 8;
char_val = val & 0xff;
append_to_buf(&char_val, 1);
val >>= 8;
char_val = val & 0xff;
append_to_buf(&char_val, 1);
break;
}
case TYPE_DATETIME: {
const DateTimeValue* time_val = (const DateTimeValue*)(item);
uint64_t val = time_val->to_olap_datetime();
append_to_buf(&val, 8);
break;
}
case TYPE_VARCHAR: {
case TYPE_HLL:
const StringValue* str_val = (const StringValue*)(item);
if (UNLIKELY(str_val->ptr == nullptr && str_val->len != 0)) {
return Status::InternalError("String value ptr is null");
}
// write len first
uint16_t len = str_val->len;
if (len != str_val->len) {
std::stringstream ss;
ss << "length of string is overflow.len=" << str_val->len;
return Status::InternalError(ss.str());
}
append_to_buf(&len, 2);
// passing a NULL pointer to memcpy may be core/
if (len == 0) {
break;
}
append_to_buf(str_val->ptr, len);
break;
}
case TYPE_CHAR: {
const StringValue* str_val = (const StringValue*)(item);
if (UNLIKELY(str_val->ptr == nullptr || str_val->len == 0)) {
return Status::InternalError("String value ptr is null");
}
append_to_buf(str_val->ptr, str_val->len);
break;
}
case TYPE_DECIMAL: {
const DecimalValue* decimal_val = reinterpret_cast<const DecimalValue*>(item);
int64_t int_val = decimal_val->int_value();
int32_t frac_val = decimal_val->frac_value();
append_to_buf(&int_val, sizeof(int_val));
append_to_buf(&frac_val, sizeof(frac_val));
break;
}
case TYPE_DECIMALV2: {
const DecimalV2Value decimal_val(reinterpret_cast<const PackedInt128*>(item)->value);
int64_t int_val = decimal_val.int_value();
int32_t frac_val = decimal_val.frac_value();
append_to_buf(&int_val, sizeof(int_val));
append_to_buf(&frac_val, sizeof(frac_val));
break;
}
default: {
std::stringstream ss;
ss << "Unknown column type " << _output_expr_ctxs[i]->root()->type();
return Status::InternalError(ss.str());
}
}
}
return Status::OK();
}
Status DppWriter::add_batch(RowBatch* batch) {
int num_rows = batch->num_rows();
if (num_rows <= 0) {
return Status::OK();
}
Status status;
for (int i = 0; i < num_rows; ++i) {
reset_buf();
TupleRow* row = batch->get_row(i);
status = append_one_row(row);
if (!status.ok()) {
LOG(WARNING) << "convert row to dpp output failed. reason: "
<< status.get_error_msg();
return status;
}
int len = _pos - _buf;
OLAPStatus olap_status = _fp->write(_buf, len);
if (olap_status != OLAP_SUCCESS) {
return Status::InternalError("write to file failed.");
}
_content_adler32 = olap_adler32(_content_adler32, _buf, len);
_write_len += len;
}
return status;
}
Status DppWriter::write_header() {
_header.set_file_length(_header.size() + _write_len);
_header.set_checksum(_content_adler32);
_header.serialize(_fp);
return Status::OK();
}
Status DppWriter::close() {
// Write header
return write_header();
}
}