baidu palo

2017-08-11 17:51:21 +08:00
commit e2311f656e
1988 changed files with 586941 additions and 0 deletions
--- a/be/src/runtime/vectorized_row_batch.cpp
+++ b/be/src/runtime/vectorized_row_batch.cpp
@ -0,0 +1,229 @@
+// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/vectorized_row_batch.h"
+#include "common/logging.h"
+
+namespace palo {
+
+//VectorizedRowBatch::VectorizedRowBatch(const TupleDescriptor& tuple_desc, int capacity)
+VectorizedRowBatch::VectorizedRowBatch(
+        const std::vector<FieldInfo>& schema, int capacity, MemTracker* mem_tracker)
+    : _schema(schema), _capacity(capacity), _num_cols(schema.size()), _mem_pool(mem_tracker) {
+    _selected_in_use = false;
+    _size = 0;
+    _row_iter = 0;
+    _has_backup = false;
+    _selected = reinterpret_cast<int*>(_mem_pool.allocate(sizeof(int) * _capacity));
+
+    for (int i = 0; i < _num_cols; ++i) {
+        boost::shared_ptr<ColumnVector> col_vec(new ColumnVector(_capacity));
+        _columns.push_back(col_vec);
+    }
+}
+
+bool VectorizedRowBatch::get_next_tuple(Tuple* tuple, const TupleDescriptor& tuple_desc) {
+    if (_row_iter < _size) {
+        std::vector<SlotDescriptor*> slots = tuple_desc.slots();
+        if (_selected_in_use) {
+            for (int i = 0; i < slots.size(); ++i) {
+                void* slot = tuple->get_slot(slots[i]->tuple_offset());
+                memory_copy(slot,
+                        reinterpret_cast<uint8_t*>(_columns[i]->col_data())
+                        + get_slot_size(slots[i]->type().type) * _selected[_row_iter],
+                        get_slot_size(slots[i]->type().type));
+            }
+        } else {
+            for (int i = 0; i < slots.size(); ++i) {
+                void* slot = tuple->get_slot(slots[i]->tuple_offset());
+                memory_copy(slot,
+                        reinterpret_cast<uint8_t*>(_columns[i]->col_data())
+                        + get_slot_size(slots[i]->type().type) * _row_iter,
+                        get_slot_size(slots[i]->type().type));
+            }
+        }
+        ++_row_iter;
+        return true;
+    } else {
+        return false;
+    }
+}
+
+void VectorizedRowBatch::to_row_batch(RowBatch* row_batch, const TupleDescriptor& tuple_desc) {
+    const std::vector<SlotDescriptor*> slots = tuple_desc.slots();
+    int row_remain = row_batch->capacity() - row_batch->num_rows();
+    int size = std::min(row_remain, _size - _row_iter);
+
+    if (size <= 0) {
+        return;
+    }
+
+    int row_index = row_batch->add_rows(size);
+    DCHECK(row_index != RowBatch::INVALID_ROW_INDEX);
+    uint8_t* tuple_buf = row_batch->tuple_data_pool()->allocate(
+                             size * tuple_desc.byte_size());
+    bzero(tuple_buf, size * tuple_desc.byte_size());
+    Tuple* tuple = reinterpret_cast<Tuple*>(tuple_buf);
+
+    if (_selected_in_use) {
+        for (int i = _row_iter; i < _row_iter + size; ++i) {
+            for (int j = 0; j < slots.size(); ++j) {
+                // TODO(hujie01) bad code need optimize
+                if (slots[j]->type().is_string_type()) {
+                    StringValue* src = reinterpret_cast<StringValue*>(
+                                           reinterpret_cast<uint8_t*>(_columns[j]->col_data())
+                                           + slots[j]->type().get_slot_size() * _selected[i]);
+                    uint8_t* v = row_batch->tuple_data_pool()->allocate(src->len);
+                    memory_copy(v, src->ptr, src->len);
+                    // if src->len == 0 then dst->ptr = NULL
+                    StringValue* slot = tuple->get_string_slot(slots[j]->tuple_offset());
+                    slot->ptr = reinterpret_cast<char*>(v);
+                    slot->len = src->len;
+                } else {
+                    void* slot = tuple->get_slot(slots[j]->tuple_offset());
+                    memory_copy(slot,
+                                reinterpret_cast<uint8_t*>(_columns[j]->col_data())
+                                + slots[j]->type().get_slot_size() * _selected[i],
+                                slots[j]->type().get_slot_size());
+                }
+            }
+
+            TupleRow* row = row_batch->get_row(row_index++);
+            row->set_tuple(0, tuple);
+            tuple = reinterpret_cast<Tuple*>(reinterpret_cast<uint8_t*>(tuple) +
+                                             tuple_desc.byte_size());
+        }
+    } else {
+        for (int i = _row_iter; i < _row_iter + size; ++i) {
+            for (int j = 0; j < slots.size(); ++j) {
+                // TODO(hujie01) bad code need optimize
+                if (slots[j]->type().is_string_type()) {
+                    StringValue* slot = tuple->get_string_slot(slots[j]->tuple_offset());
+                    StringValue* src = reinterpret_cast<StringValue*>(
+                                           reinterpret_cast<uint8_t*>(_columns[j]->col_data())
+                                           + slots[j]->type().get_slot_size() * i);
+                    uint8_t* v = row_batch->tuple_data_pool()->allocate(src->len);
+                    memory_copy(v, src->ptr, src->len);
+                    // if src->len == 0 then dst->ptr = NULL
+                    slot->ptr = reinterpret_cast<char*>(v);
+                    slot->len = src->len;
+                } else {
+                    void* slot = tuple->get_slot(slots[j]->tuple_offset());
+                    memory_copy(slot,
+                                reinterpret_cast<uint8_t*>(_columns[j]->col_data())
+                                + slots[j]->type().get_slot_size() * i,
+                                slots[j]->type().get_slot_size());
+                }
+            }
+
+            TupleRow* row = row_batch->get_row(row_index++);
+            row->set_tuple(0, tuple);
+            tuple = reinterpret_cast<Tuple*>(reinterpret_cast<uint8_t*>(tuple) +
+                                             tuple_desc.byte_size());
+        }
+    }
+
+    _row_iter += size;
+    row_batch->commit_rows(size);
+}
+
+#if 0
+void VectorizedRowBatch::reorganized_from_pax(
+        RowBlock
+        const std::vector<FieldInfo>& schema) {
+    for (int i = 0, j = 0; i < _num_cols && j < schema.size(); ++i) {
+        if (_schema[i].unique_id != schema[j].unique_id
+                || column(i)->col_data() != NULL) {
+            continue;
+        }
+        ++j;
+
+        switch (_schema[i].type) {
+        case OLAP_FIELD_TYPE_STRING: {
+            StringValue* value = reinterpret_cast<StringValue*>(
+                                     _mem_pool.allocate(get_slot_size(TYPE_VARCHAR) * _size));
+            int len = column(i)->byte_size() / _size;
+            char* raw = reinterpret_cast<char*>(column(i)->col_data());
+
+            for (int j = 0; j < _size; ++j) {
+                value[j].ptr = raw + len * j;
+                value[j].len = strnlen(value[j].ptr, len);
+            }
+
+            column(i)->set_col_data(value);
+            break;
+        }
+        case OLAP_FIELD_TYPE_VARCHAR:
+             OLAP_FIELD_TYPE_HLL :{
+            typedef uint32_t OffsetValueType;
+            typedef uint16_t LengthValueType;
+            DCHECK_EQ(sizeof(OffsetValueType) * _size, column(i)->byte_size());
+            StringValue* value = reinterpret_cast<StringValue*>(
+                                     _mem_pool.allocate(get_slot_size(TYPE_VARCHAR) * _size));
+            OffsetValueType* offsets = reinterpret_cast<OffsetValueType*>(
+                                           column(i)->col_data());
+            char* raw = reinterpret_cast<char*>(column(i)->col_string_data());
+
+            for (int j = 0; j < _size; ++j) {
+                value[j].len = *reinterpret_cast<LengthValueType*>(raw + offsets[j]);
+                value[j].ptr = raw + offsets[j] + sizeof(LengthValueType);
+            }
+
+            column(i)->set_col_data(value);
+            break;
+        }
+        case OLAP_FIELD_TYPE_DATE: {
+            uint8_t* value = reinterpret_cast<uint8_t*>(
+                                 _mem_pool.allocate(get_slot_size(TYPE_DATE) * _size));
+            uint8_t* raw = reinterpret_cast<uint8_t*>(column(i)->col_data());
+
+            for (int j = 0; j < _size; ++j) {
+                new(value + j * get_slot_size(TYPE_DATE))
+                TimestampValue(raw + j * 3, OLAP_DATETIME);
+            }
+
+            column(i)->set_col_data(value);
+            break;
+        }
+        case OLAP_FIELD_TYPE_DATETIME: {
+            uint8_t* value = reinterpret_cast<uint8_t*>(
+                                 _mem_pool.allocate(get_slot_size(TYPE_DATETIME) * _size));
+            uint8_t* raw = reinterpret_cast<uint8_t*>(column(i)->col_data());
+
+            for (int j = 0; j < _size; ++j) {
+                new(value + j * get_slot_size(TYPE_DATETIME))
+                TimestampValue(raw + j * 8, OLAP_DATE);
+            }
+
+            column(i)->set_col_data(value);
+            break;
+        }
+        default:
+            break;
+        }
+    }
+}
+#endif
+
+//void VectorizedRowBatch::reorganized_from_dsm() {
+
+//}
+}
+
+/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */