According to the requirements of the APLv2, the referenced code needs to be marked with the path of the source code.
747 lines
27 KiB
Python
Executable File
747 lines
27 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
# This file is copied from
|
|
# https://github.com/cloudera/Impala/blob/v0.7refresh/common/function-registry/gen_functions.py
|
|
# and modified by Doris
|
|
|
|
"""
|
|
# This script will generate the implementation of the simple functions for the BE.
|
|
# These include:
|
|
# - Arithmetic functions
|
|
# - Binary functions
|
|
# - Cast functions
|
|
#
|
|
# The script outputs (run: 'src/common/function/gen_functions.py')
|
|
# - header and implemention for above functions:
|
|
# - src/gen_cpp/opcode/functions.[h/cc]
|
|
# - python file that contains the metadata for those functions:
|
|
# - src/gen_cpp/generated_functions.py
|
|
"""
|
|
|
|
import string
|
|
import os
|
|
import errno
|
|
|
|
unary_op = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
e->_result.${result_field} = ${native_op} *val;\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
|
|
binary_op_divid = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op1 = e->children()[0];\n\
|
|
${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
|
|
Expr* op2 = e->children()[1];\n\
|
|
${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
|
|
if (val1 == NULL || val2 == NULL) return NULL;\n\
|
|
double value= *val2;\n\
|
|
if (value == 0) return NULL;\n\
|
|
e->_result.${result_field} = (*val1 ${native_op} *val2);\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
binary_op = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op1 = e->children()[0];\n\
|
|
${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
|
|
Expr* op2 = e->children()[1];\n\
|
|
${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
|
|
if (val1 == NULL || val2 == NULL) return NULL;\n\
|
|
e->_result.${result_field} = (*val1 ${native_op} *val2);\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
double_mod = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op1 = e->children()[0];\n\
|
|
${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
|
|
Expr* op2 = e->children()[1];\n\
|
|
${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
|
|
if (val1 == NULL || val2 == NULL) return NULL;\n\
|
|
double value= *val2;\n\
|
|
if (value == 0) return NULL;\n\
|
|
e->_result.${result_field} = fmod(*val1, *val2);\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
binary_func = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op1 = e->children()[0];\n\
|
|
${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
|
|
Expr* op2 = e->children()[1];\n\
|
|
${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
|
|
if (val1 == NULL || val2 == NULL) return NULL;\n\
|
|
e->_result.${result_field} = val1->${native_func}(*val2);\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
float_to_decimal = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
e->_result.${result_field}.assign_from_float(*val);;\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
double_to_decimal = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
e->_result.${result_field}.assign_from_double(*val);;\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
cast = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
e->_result.${result_field} = *val;\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
string_to_int = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
StringParser::ParseResult result;\n\
|
|
e->_result.${result_field} = \
|
|
StringParser::string_to_int<${native_type2}>(val->ptr, val->len, &result);\n\
|
|
if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
string_to_float = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
StringParser::ParseResult result;\n\
|
|
e->_result.${result_field} = \
|
|
StringParser::string_to_float<${native_type2}>(val->ptr, val->len, &result);\n\
|
|
if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
numeric_to_date = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
DateTimeValue *date_val = &e->_result.${result_field};\n\
|
|
if (!date_val->from_date_int64(*val)) {\n\
|
|
return NULL;\n\
|
|
}\n\
|
|
date_val->cast_to_date();\n\
|
|
return date_val;\n\
|
|
}\n\n")
|
|
|
|
string_to_date = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
DateTimeValue *date_val = &e->_result.${result_field};\n\
|
|
if (!date_val->from_date_str(val->ptr, val->len)) {\n\
|
|
return NULL;\n\
|
|
}\n\
|
|
date_val->cast_to_date();\n\
|
|
return date_val;\n\
|
|
}\n\n")
|
|
|
|
datetime_to_date = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
DateTimeValue *date_val = &e->_result.${result_field};\n\
|
|
*date_val = *val;\n\
|
|
date_val->cast_to_date();\n\
|
|
return date_val;\n\
|
|
}\n\n")
|
|
|
|
numeric_to_datetime = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
DateTimeValue *date_val = &e->_result.${result_field};\n\
|
|
if (!date_val->from_date_int64(*val)) {\n\
|
|
return NULL;\n\
|
|
}\n\
|
|
date_val->to_datetime();\n\
|
|
return date_val;\n\
|
|
}\n\n")
|
|
|
|
string_to_datetime = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
DateTimeValue *date_val = &e->_result.${result_field};\n\
|
|
if (!date_val->from_date_str(val->ptr, val->len)) {\n\
|
|
return NULL;\n\
|
|
}\n\
|
|
date_val->to_datetime();\n\
|
|
return date_val;\n\
|
|
}\n\n")
|
|
|
|
date_to_datetime = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
DateTimeValue *date_val = &e->_result.${result_field};\n\
|
|
*date_val = *val;\n\
|
|
date_val->to_datetime();\n\
|
|
return date_val;\n\
|
|
}\n\n")
|
|
|
|
datetime_to_numeric = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
e->_result.${result_field} = val->to_int64();\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
decimal_to_string = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
e->_result.set_string_val(val->to_string());\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
datetime_to_string = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
char buf[64];\n\
|
|
val->to_string(buf);\n\
|
|
e->_result.set_string_val(buf);\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
numeric_to_string = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
e->_result.set_string_val(std::to_string(*val));\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
largeint_to_string = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
char buf[64];\n\
|
|
int len = 64;\n\
|
|
char *str = LargeIntValue::to_string(*val, buf, &len);\n\
|
|
e->_result.set_string_val(std::string(str, len));\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
float_to_string = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
char buf[64];\n\
|
|
my_gcvt(*val, MY_GCVT_ARG_FLOAT, 64, buf, NULL);\n\
|
|
e->_result.set_string_val(buf);\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
double_to_string = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
char buf[64];\n\
|
|
my_gcvt(*val, MY_GCVT_ARG_DOUBLE, 64, buf, NULL);\n\
|
|
e->_result.set_string_val(buf);\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
# Need to special case tinyint. boost thinks it is a char and handles it differently.
|
|
# e.g. '0' is written as an empty string.
|
|
string_to_tinyint = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
string tmp(val->ptr, val->len);\n\
|
|
try {\n\
|
|
e->_result.${result_field} = static_cast<int8_t>(lexical_cast<int16_t>(tmp));\n\
|
|
} catch (bad_lexical_cast &) {\n\
|
|
return NULL;\n\
|
|
}\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
tinyint_to_string = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
Expr* op = e->children()[0];\n\
|
|
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
|
|
if (val == NULL) return NULL;\n\
|
|
int64_t tmp_val = *val;\n\
|
|
e->_result.set_string_val(lexical_cast<string>(tmp_val));\n\
|
|
return &e->_result.${result_field};\n\
|
|
}\n\n")
|
|
|
|
case = string.Template("\
|
|
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
|
CaseExpr* expr = static_cast<CaseExpr*>(e);\n\
|
|
int num_children = e->get_num_children();\n\
|
|
int loop_end = (expr->has_else_expr()) ? num_children - 1 : num_children;\n\
|
|
// Make sure we set the right compute function.\n\
|
|
DCHECK_EQ(expr->has_case_expr(), true);\n\
|
|
// Need at least case, when and then expr, and optionally an else.\n\
|
|
DCHECK_GE(num_children, (expr->has_else_expr()) ? 4 : 3);\n\
|
|
// All case and when exprs return the same type (we guaranteed that during analysis).\n\
|
|
void* case_val = e->children()[0]->get_value(row);\n\
|
|
if (case_val == NULL) {\n\
|
|
if (expr->has_else_expr()) {\n\
|
|
// Return else value.\n\
|
|
return e->children()[num_children - 1]->get_value(row);\n\
|
|
} else {\n\
|
|
return NULL;\n\
|
|
}\n\
|
|
}\n\
|
|
for (int i = 1; i < loop_end; i += 2) {\n\
|
|
${native_type1}* when_val =\n\
|
|
reinterpret_cast<${native_type1}*>(e->children()[i]->get_value(row));\n\
|
|
if (when_val == NULL) continue;\n\
|
|
if (*reinterpret_cast<${native_type1}*>(case_val) == *when_val) {\n\
|
|
// Return then value.\n\
|
|
return e->children()[i + 1]->get_value(row);\n\
|
|
}\n\
|
|
}\n\
|
|
if (expr->has_else_expr()) {\n\
|
|
// Return else value.\n\
|
|
return e->children()[num_children - 1]->get_value(row);\n\
|
|
}\n\
|
|
return NULL;\n\
|
|
}\n\n")
|
|
|
|
python_template = string.Template("\
|
|
['${fn_name}', '${return_type}', [${args}], 'ComputeFunctions::${fn_signature}', []], \n")
|
|
|
|
# Mapping of function to template
|
|
templates = {
|
|
'Add': binary_op,
|
|
'Subtract': binary_op,
|
|
'Multiply': binary_op,
|
|
'Divide': binary_op_divid,
|
|
'Int_Divide': binary_op_divid,
|
|
'Mod': binary_op_divid,
|
|
'BitAnd': binary_op,
|
|
'BitXor': binary_op,
|
|
'BitOr': binary_op,
|
|
'BitNot': unary_op,
|
|
'Eq': binary_op,
|
|
'Ne': binary_op,
|
|
'Ge': binary_op,
|
|
'Gt': binary_op,
|
|
'Lt': binary_op,
|
|
'Le': binary_op,
|
|
'Cast': cast,
|
|
}
|
|
|
|
# Some aggregate types that are useful for defining functions
|
|
types = {
|
|
'BOOLEAN': ['BOOLEAN'],
|
|
'TINYINT': ['TINYINT'],
|
|
'SMALLINT': ['SMALLINT'],
|
|
'INT': ['INT'],
|
|
'BIGINT': ['BIGINT'],
|
|
'LARGEINT': ['LARGEINT'],
|
|
'FLOAT': ['FLOAT'],
|
|
'DOUBLE': ['DOUBLE'],
|
|
'STRING': ['VARCHAR'],
|
|
'DATE': ['DATE'],
|
|
'DATETIME': ['DATETIME'],
|
|
'DECIMALV2': ['DECIMALV2'],
|
|
'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'],
|
|
'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'],
|
|
'FLOAT_TYPES': ['FLOAT', 'DOUBLE'],
|
|
'NUMERIC_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', \
|
|
'LARGEINT', 'DECIMALV2'],
|
|
'STRING_TYPES': ['VARCHAR'],
|
|
'DATETIME_TYPES': ['DATE', 'DATETIME'],
|
|
'FIXED_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'],
|
|
'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
|
|
'STRCAST_FIXED_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'],
|
|
'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\
|
|
'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'],
|
|
'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'],
|
|
}
|
|
|
|
# Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]]
|
|
functions = [
|
|
# Arithmetic Expr
|
|
['Add', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
|
|
['Subtract', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
|
|
['Multiply', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
|
|
['Divide', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
|
|
['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
|
|
['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
|
|
['Mod', ['DECIMALV2'], [['DECIMALV2'], ['DECIMALV2']]],
|
|
['Mod', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']], double_mod],
|
|
['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
|
|
['BitXor', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
|
|
['BitOr', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
|
|
['BitNot', ['INT_TYPES'], [['INT_TYPES']]],
|
|
|
|
# BinaryPredicates
|
|
['Eq', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
|
|
['Ne', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
|
|
['Gt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
|
|
['Lt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
|
|
['Ge', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
|
|
['Le', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
|
|
['Eq', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
|
|
['Ne', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
|
|
['Gt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
|
|
['Lt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
|
|
['Ge', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
|
|
['Le', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
|
|
['Eq', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
|
|
['Ne', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
|
|
['Gt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
|
|
['Lt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
|
|
['Ge', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
|
|
['Le', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
|
|
['Eq', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
|
|
['Ne', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
|
|
['Gt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
|
|
['Lt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
|
|
['Ge', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
|
|
['Le', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
|
|
['Eq', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
|
|
['Ne', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
|
|
['Gt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
|
|
['Lt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
|
|
['Ge', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
|
|
['Le', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
|
|
|
|
# Casts
|
|
['Cast', ['BOOLEAN'], [['NATIVE_TYPES'], ['BOOLEAN']]],
|
|
['Cast', ['TINYINT'], [['NATIVE_TYPES'], ['TINYINT']]],
|
|
['Cast', ['SMALLINT'], [['NATIVE_TYPES'], ['SMALLINT']]],
|
|
['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']]],
|
|
['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']]],
|
|
['Cast', ['LARGEINT'], [['NATIVE_TYPES'], ['LARGEINT']]],
|
|
['Cast', ['LARGEINT'], [['DECIMALV2'], ['LARGEINT']]],
|
|
['Cast', ['NATIVE_TYPES'], [['LARGEINT'], ['NATIVE_TYPES']]],
|
|
['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']]],
|
|
['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']]],
|
|
['Cast', ['DECIMALV2'], [['FIXED_TYPES'], ['DECIMALV2']]],
|
|
['Cast', ['DECIMALV2'], [['FLOAT'], ['DECIMALV2']], float_to_decimal],
|
|
['Cast', ['DECIMALV2'], [['DOUBLE'], ['DECIMALV2']], double_to_decimal],
|
|
['Cast', ['NATIVE_TYPES'], [['DECIMALV2'], ['NATIVE_TYPES']]],
|
|
['Cast', ['NATIVE_INT_TYPES'], [['STRING'], ['NATIVE_INT_TYPES']], string_to_int],
|
|
['Cast', ['LARGEINT'], [['STRING'], ['LARGEINT']], string_to_int],
|
|
['Cast', ['FLOAT_TYPES'], [['STRING'], ['FLOAT_TYPES']], string_to_float],
|
|
['Cast', ['STRING'], [['STRCAST_FIXED_TYPES'], ['STRING']], numeric_to_string],
|
|
['Cast', ['STRING'], [['LARGEINT'], ['STRING']], largeint_to_string],
|
|
['Cast', ['STRING'], [['FLOAT'], ['STRING']], float_to_string],
|
|
['Cast', ['STRING'], [['DOUBLE'], ['STRING']], double_to_string],
|
|
['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string],
|
|
['Cast', ['STRING'], [['DECIMALV2'], ['STRING']], decimal_to_string],
|
|
# Datetime cast
|
|
['Cast', ['DATE'], [['NUMERIC_TYPES'], ['DATE']], numeric_to_date],
|
|
['Cast', ['DATETIME'], [['NUMERIC_TYPES'], ['DATETIME']], numeric_to_datetime],
|
|
['Cast', ['DATE'], [['STRING_TYPES'], ['DATE']], string_to_date],
|
|
['Cast', ['DATETIME'], [['STRING_TYPES'], ['DATETIME']], string_to_datetime],
|
|
['Cast', ['DATE'], [['DATETIME'], ['DATE']], datetime_to_date],
|
|
['Cast', ['DATETIME'], [['DATE'], ['DATETIME']], date_to_datetime],
|
|
['Cast', ['NUMERIC_TYPES'], [['DATETIME'], ['NUMERIC_TYPES']], datetime_to_numeric],
|
|
['Cast', ['NUMERIC_TYPES'], [['DATE'], ['NUMERIC_TYPES']], datetime_to_numeric],
|
|
['Cast', ['STRING_TYPES'], [['DATE'], ['STRING_TYPES']], datetime_to_string],
|
|
['Cast', ['STRING_TYPES'], [['DATETIME'], ['STRING_TYPES']], datetime_to_string],
|
|
|
|
# Case
|
|
# The case expr is special because it has a variable number of function args,
|
|
# but we guarantee that all of them are of the same type during query analysis,
|
|
# so we just list exactly one here.
|
|
# In addition, the return type given here is a dummy, because it is
|
|
# not necessarily the same as the function args type.
|
|
['Case', ['ALL_TYPES'], [['ALL_TYPES']], case],
|
|
]
|
|
|
|
native_types = {
|
|
'BOOLEAN': 'bool',
|
|
'TINYINT': 'char',
|
|
'SMALLINT': 'short',
|
|
'INT': 'int',
|
|
'BIGINT': 'long',
|
|
'LARGEINT': '__int128',
|
|
'FLOAT': 'float',
|
|
'DOUBLE': 'double',
|
|
'VARCHAR': 'StringValue',
|
|
'DATE': 'Date',
|
|
'DATETIME': 'DateTime',
|
|
'TIME': 'double',
|
|
'DECIMALV2': 'DecimalV2Value',
|
|
}
|
|
|
|
# Portable type used in the function implementation
|
|
implemented_types = {
|
|
'BOOLEAN': 'bool',
|
|
'TINYINT': 'int8_t',
|
|
'SMALLINT': 'int16_t',
|
|
'INT': 'int32_t',
|
|
'BIGINT': 'int64_t',
|
|
'LARGEINT': '__int128',
|
|
'FLOAT': 'float',
|
|
'DOUBLE': 'double',
|
|
'VARCHAR': 'StringValue',
|
|
'DATE': 'DateTimeValue',
|
|
'DATETIME': 'DateTimeValue',
|
|
'TIME': 'double',
|
|
'DECIMALV2': 'DecimalV2Value',
|
|
}
|
|
result_fields = {
|
|
'BOOLEAN': 'bool_val',
|
|
'TINYINT': 'tinyint_val',
|
|
'SMALLINT': 'smallint_val',
|
|
'INT': 'int_val',
|
|
'BIGINT': 'bigint_val',
|
|
'LARGEINT': 'large_int_val',
|
|
'FLOAT': 'float_val',
|
|
'DOUBLE': 'double_val',
|
|
'VARCHAR': 'string_val',
|
|
'DATE': 'datetime_val',
|
|
'DATETIME': 'datetime_val',
|
|
'TIME': 'double_val',
|
|
'DECIMALV2': 'decimalv2_val',
|
|
}
|
|
|
|
native_ops = {
|
|
'BITAND': '&',
|
|
'BITNOT': '~',
|
|
'BITOR': '|',
|
|
'BITXOR': '^',
|
|
'DIVIDE': '/',
|
|
'EQ': '==',
|
|
'GT': '>',
|
|
'GE': '>=',
|
|
'INT_DIVIDE': '/',
|
|
'SUBTRACT': '-',
|
|
'MOD': '%',
|
|
'MULTIPLY': '*',
|
|
'LT': '<',
|
|
'LE': '<=',
|
|
'NE': '!=',
|
|
'ADD': '+',
|
|
}
|
|
|
|
native_funcs = {
|
|
'EQ': 'eq',
|
|
'LE': 'le',
|
|
'LT': 'lt',
|
|
'NE': 'ne',
|
|
'GE': 'ge',
|
|
'GT': 'gt',
|
|
}
|
|
|
|
cc_preamble = '\
|
|
\n\
|
|
// This is a generated file, DO NOT EDIT.\n\
|
|
// To add new functions, see impala/common/function-registry/gen_opcodes.py\n\
|
|
\n\
|
|
#include "gen_cpp/opcode/functions.h"\n\
|
|
#include "exprs/expr.h"\n\
|
|
#include "exprs/case_expr.h"\n\
|
|
#include "runtime/string_value.hpp"\n\
|
|
#include "runtime/tuple_row.h"\n\
|
|
#include "util/mysql_dtoa.h"\n\
|
|
#include "util/string_parser.hpp"\n\
|
|
#include <boost/lexical_cast.hpp>\n\
|
|
\n\
|
|
using namespace boost;\n\
|
|
using namespace std;\n\
|
|
\n\
|
|
namespace doris { \n\
|
|
\n'
|
|
|
|
cc_epilogue = '\
|
|
}\n'
|
|
|
|
h_preamble = '\
|
|
\n\
|
|
#ifndef DORIS_OPCODE_FUNCTIONS_H\n\
|
|
#define DORIS_OPCODE_FUNCTIONS_H\n\
|
|
\n\
|
|
namespace doris {\n\
|
|
class Expr;\n\
|
|
class OpcodeRegistry;\n\
|
|
class TupleRow;\n\
|
|
\n\
|
|
class ComputeFunctions {\n\
|
|
public:\n'
|
|
|
|
h_epilogue = '\
|
|
};\n\
|
|
\n\
|
|
}\n\
|
|
\n\
|
|
#endif\n'
|
|
|
|
python_preamble = '\
|
|
#!/usr/bin/env python\n\
|
|
\n\
|
|
# This is a generated file, DO NOT EDIT IT.\n\
|
|
# To add new functions, see impala/common/function-registry/gen_opcodes.py\n\
|
|
\n\
|
|
functions = [\n'
|
|
|
|
python_epilogue = ']'
|
|
|
|
header_template = string.Template("\
|
|
static void* ${fn_signature}(Expr* e, TupleRow* row);\n")
|
|
|
|
BE_PATH = "../gen_cpp/opcode/"
|
|
|
|
def initialize_sub(op, return_type, arg_types):
|
|
"""
|
|
Expand the signature data for template substitution. Returns
|
|
a dictionary with all the entries for all the templates used in this script
|
|
"""
|
|
sub = {}
|
|
sub["fn_name"] = op
|
|
sub["fn_signature"] = op
|
|
sub["return_type"] = return_type
|
|
sub["result_field"] = result_fields[return_type]
|
|
sub["args"] = ""
|
|
if op.upper() in native_ops:
|
|
sub["native_op"] = native_ops[op.upper()]
|
|
for idx in range(0, len(arg_types)):
|
|
arg = arg_types[idx]
|
|
sub["fn_signature"] += "_" + native_types[arg]
|
|
sub["native_type" + repr(idx + 1)] = implemented_types[arg]
|
|
sub["args"] += "'" + arg + "', "
|
|
return sub
|
|
|
|
if __name__ == "__main__":
|
|
|
|
try:
|
|
os.makedirs(BE_PATH)
|
|
except OSError as e:
|
|
if e.errno == errno.EEXIST:
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
h_file = open(BE_PATH + 'functions.h', 'w')
|
|
cc_file = open(BE_PATH + 'functions.cc', 'w')
|
|
python_file = open('generated_functions.py', 'w')
|
|
h_file.write(h_preamble)
|
|
cc_file.write(cc_preamble)
|
|
python_file.write(python_preamble)
|
|
|
|
# Generate functions and headers
|
|
for func_data in functions:
|
|
op = func_data[0]
|
|
# If a specific template has been specified, use that one.
|
|
if len(func_data) >= 4:
|
|
template = func_data[3]
|
|
else:
|
|
# Skip functions with no template (shouldn't be auto-generated)
|
|
if not op in templates:
|
|
continue
|
|
template = templates[op]
|
|
|
|
# Expand all arguments
|
|
return_types = []
|
|
for ret in func_data[1]:
|
|
for t in types[ret]:
|
|
return_types.append(t)
|
|
signatures = []
|
|
for args in func_data[2]:
|
|
expanded_arg = []
|
|
for arg in args:
|
|
for t in types[arg]:
|
|
expanded_arg.append(t)
|
|
signatures.append(expanded_arg)
|
|
|
|
# Put arguments into substitution structure
|
|
num_functions = 0
|
|
for args in signatures:
|
|
num_functions = max(num_functions, len(args))
|
|
num_functions = max(num_functions, len(return_types))
|
|
num_args = len(signatures)
|
|
|
|
# Validate the input is correct
|
|
if len(return_types) != 1 and len(return_types) != num_functions:
|
|
print("Invalid Declaration: " + func_data)
|
|
sys.exit(1)
|
|
|
|
for args in signatures:
|
|
if len(args) != 1 and len(args) != num_functions:
|
|
print("Invalid Declaration: " + func_data)
|
|
sys.exit(1)
|
|
|
|
# Iterate over every function signature to generate
|
|
for i in range(0, num_functions):
|
|
if len(return_types) == 1:
|
|
return_type = return_types[0]
|
|
else:
|
|
return_type = return_types[i]
|
|
|
|
arg_types = []
|
|
for j in range(0, num_args):
|
|
if len(signatures[j]) == 1:
|
|
arg_types.append(signatures[j][0])
|
|
else:
|
|
arg_types.append(signatures[j][i])
|
|
|
|
# At this point, 'return_type' is a single type and 'arg_types'
|
|
# is a list of single types
|
|
sub = initialize_sub(op, return_type, arg_types)
|
|
if template == binary_func:
|
|
sub["native_func"] = native_funcs[op.upper()]
|
|
|
|
h_file.write(header_template.substitute(sub))
|
|
cc_file.write(template.substitute(sub))
|
|
python_file.write(python_template.substitute(sub))
|
|
|
|
h_file.write(h_epilogue)
|
|
cc_file.write(cc_epilogue)
|
|
python_file.write(python_epilogue)
|
|
h_file.close()
|
|
cc_file.close()
|
|
python_file.close()
|