[feature](vectorized) support lateral view (#8448)

This commit is contained in:
Pxl
2022-03-17 10:04:24 +08:00
committed by GitHub
parent aadfbcb9c8
commit a824c3e489
18 changed files with 602 additions and 93 deletions

View File

@ -27,8 +27,7 @@ ExplodeSplitTableFunction::ExplodeSplitTableFunction() {
_fn_name = "explode_split";
}
ExplodeSplitTableFunction::~ExplodeSplitTableFunction() {
}
ExplodeSplitTableFunction::~ExplodeSplitTableFunction() {}
Status ExplodeSplitTableFunction::prepare() {
return Status::OK();
@ -43,13 +42,14 @@ Status ExplodeSplitTableFunction::open() {
if (fn_ctx->is_arg_constant(1)) {
_is_delimiter_constant = true;
StringVal* delimiter = reinterpret_cast<StringVal*>(fn_ctx->get_constant_arg(1));
_const_delimter = StringPiece((char*) delimiter->ptr, delimiter->len);
_const_delimter = StringPiece((char*)delimiter->ptr, delimiter->len);
}
return Status::OK();
}
Status ExplodeSplitTableFunction::process(TupleRow* tuple_row) {
CHECK(2 == _expr_context->root()->get_num_children()) << _expr_context->root()->get_num_children();
CHECK(2 == _expr_context->root()->get_num_children())
<< _expr_context->root()->get_num_children();
_is_current_empty = false;
_eos = false;
@ -61,12 +61,14 @@ Status ExplodeSplitTableFunction::process(TupleRow* tuple_row) {
_cur_offset = 0;
} else {
if (_is_delimiter_constant) {
_backup = strings::Split(StringPiece((char*) text.ptr, text.len), _const_delimter);
_backup = strings::Split(StringPiece((char*)text.ptr, text.len), _const_delimter);
} else {
StringVal delimiter = _expr_context->root()->get_child(1)->get_string_val(_expr_context, tuple_row);
_backup = strings::Split(StringPiece((char*) text.ptr, text.len), StringPiece((char*) delimiter.ptr, delimiter.len));
StringVal delimiter =
_expr_context->root()->get_child(1)->get_string_val(_expr_context, tuple_row);
_backup = strings::Split(StringPiece((char*)text.ptr, text.len),
StringPiece((char*)delimiter.ptr, delimiter.len));
}
for (const std::string & str : _backup) {
for (const std::string& str : _backup) {
_data.emplace_back(str);
}
_cur_size = _backup.size();

View File

@ -18,7 +18,6 @@
#pragma once
#include "exprs/table_function/table_function.h"
#include "gutil/strings/stringpiece.h"
#include "runtime/string_value.h"
@ -38,8 +37,7 @@ public:
virtual Status forward(bool* eos) override;
private:
protected:
// The string value splitted from source, and will be referenced by
// table function scan node.
// the `_backup` saved the real string entity.
@ -50,7 +48,6 @@ private:
// if true, the constant delimiter will be saved in `_const_delimter`
bool _is_delimiter_constant = false;
StringPiece _const_delimter;
};
} // namespace doris

View File

@ -17,14 +17,18 @@
#pragma once
#include <fmt/core.h>
#include <stddef.h>
#include "common/status.h"
#include "vec/core/block.h"
#include "vec/exprs/vexpr_context.h"
namespace doris {
// TODO: think about how to manager memeory consumption of table functions.
// Currently, the memory allocated from table function is from malloc directly.
class TableFunctionState {
};
class TableFunctionState {};
class ExprContext;
class TupleRow;
@ -34,24 +38,54 @@ public:
virtual Status prepare() = 0;
virtual Status open() = 0;
virtual Status process(TupleRow* tuple_row) = 0;
// only used for vectorized.
virtual Status process_init(vectorized::Block* block) {
return Status::NotSupported(
fmt::format("vectorized table function {} not supported now.", _fn_name));
}
// only used for vectorized.
virtual Status process_row(size_t row_idx) {
return Status::NotSupported(
fmt::format("vectorized table function {} not supported now.", _fn_name));
}
// only used for vectorized.
virtual Status process_close() {
return Status::NotSupported(
fmt::format("vectorized table function {} not supported now.", _fn_name));
}
virtual Status reset() = 0;
virtual Status get_value(void** output) = 0;
// only used for vectorized.
virtual Status get_value_length(int64_t* length) {
*length = -1;
return Status::OK();
}
virtual Status close() = 0;
virtual Status forward(bool *eos) = 0;
virtual Status forward(bool* eos) = 0;
public:
std::string name() const { return _fn_name; }
bool eos() const { return _eos; }
void set_expr_context(ExprContext* expr_context) {
_expr_context = expr_context;
void set_expr_context(ExprContext* expr_context) { _expr_context = expr_context; }
void set_vexpr_context(vectorized::VExprContext* vexpr_context) {
_vexpr_context = vexpr_context;
}
protected:
std::string _fn_name;
ExprContext* _expr_context;
ExprContext* _expr_context = nullptr;
vectorized::VExprContext* _vexpr_context = nullptr;
// true if there is no more data can be read from this function.
bool _eos = false;
// true means the function result set from current row is empty(eg, source value is null or empty).

View File

@ -21,28 +21,49 @@
#include "exprs/table_function/explode_bitmap.h"
#include "exprs/table_function/explode_json_array.h"
#include "exprs/table_function/explode_split.h"
#include "exprs/table_function/table_function.h"
#include "vec/exprs/table_function/vexplode_split.h"
namespace doris {
Status TableFunctionFactory::get_fn(const std::string& fn_name, ObjectPool* pool, TableFunction** fn) {
if (fn_name == "explode_split") {
*fn = pool->add(new ExplodeSplitTableFunction());
return Status::OK();
} else if (fn_name == "explode_bitmap") {
*fn = pool->add(new ExplodeBitmapTableFunction());
return Status::OK();
} else if (fn_name == "explode_json_array_int") {
*fn = pool->add(new ExplodeJsonArrayTableFunction(ExplodeJsonArrayType::INT));
return Status::OK();
} else if (fn_name == "explode_json_array_double") {
*fn = pool->add(new ExplodeJsonArrayTableFunction(ExplodeJsonArrayType::DOUBLE));
return Status::OK();
} else if (fn_name == "explode_json_array_string") {
*fn = pool->add(new ExplodeJsonArrayTableFunction(ExplodeJsonArrayType::STRING));
return Status::OK();
} else {
return Status::NotSupported("Unknown table function: " + fn_name);
template <typename TableFunctionType>
struct TableFunctionCreator {
TableFunction* operator()() { return new TableFunctionType(); }
};
template <>
struct TableFunctionCreator<ExplodeJsonArrayTableFunction> {
ExplodeJsonArrayType type;
TableFunction* operator()() { return new ExplodeJsonArrayTableFunction(type); }
};
inline auto ExplodeJsonArrayIntCreator =
TableFunctionCreator<ExplodeJsonArrayTableFunction> {ExplodeJsonArrayType::INT};
inline auto ExplodeJsonArrayDoubleCreator =
TableFunctionCreator<ExplodeJsonArrayTableFunction> {ExplodeJsonArrayType::DOUBLE};
inline auto ExplodeJsonArrayStringCreator =
TableFunctionCreator<ExplodeJsonArrayTableFunction> {ExplodeJsonArrayType::STRING};
//{fn_name,is_vectorized}->table_function_creator
const std::unordered_map<std::pair<std::string, bool>, std::function<TableFunction*()>>
TableFunctionFactory::_function_map {
{{"explode_split", false}, TableFunctionCreator<ExplodeSplitTableFunction>()},
{{"explode_bitmap", false}, TableFunctionCreator<ExplodeBitmapTableFunction>()},
{{"explode_json_array_int", false}, ExplodeJsonArrayIntCreator},
{{"explode_json_array_double", false}, ExplodeJsonArrayDoubleCreator},
{{"explode_json_array_string", false}, ExplodeJsonArrayStringCreator},
{{"explode_split", true}, TableFunctionCreator<VExplodeSplitTableFunction>()}};
Status TableFunctionFactory::get_fn(const std::string& fn_name, bool is_vectorized,
ObjectPool* pool, TableFunction** fn) {
auto fn_iterator = _function_map.find({fn_name, is_vectorized});
if (fn_iterator != _function_map.end()) {
*fn = pool->add(fn_iterator->second());
return Status::OK();
}
return Status::NotSupported(std::string(is_vectorized ? "vectorized " : "") +
"table function " + fn_name + " not support");
}
} // namespace doris

View File

@ -17,10 +17,12 @@
#pragma once
#include "exprs/table_function/table_function_factory.h"
#include "exprs/table_function/explode_split.h"
#include <functional>
#include <unordered_map>
#include "common/status.h"
#include "exprs/table_function/explode_split.h"
#include "exprs/table_function/table_function_factory.h"
namespace doris {
@ -30,7 +32,11 @@ class TableFunctionFactory {
public:
TableFunctionFactory() {}
~TableFunctionFactory() {}
static Status get_fn(const std::string& fn_name, ObjectPool* pool, TableFunction** fn);
static Status get_fn(const std::string& fn_name, bool is_vectorized, ObjectPool* pool,
TableFunction** fn);
const static std::unordered_map<std::pair<std::string, bool>, std::function<TableFunction*()>>
_function_map;
};
} // namespace doris