#!/usr/bin/env python # encoding: utf-8 # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # This file is copied from # https://github.com/cloudera/Impala/blob/v0.7refresh/common/function-registry/gen_functions.py # and modified by Doris """ # This script will generate the implementation of the simple functions for the BE. # These include: # - Arithmetic functions # - Binary functions # - Cast functions # # The script outputs (run: 'src/common/function/gen_functions.py') # - header and implemention for above functions: # - src/gen_cpp/opcode/functions.[h/cc] # - python file that contains the metadata for those functions: # - src/gen_cpp/generated_functions.py """ import string import os import errno unary_op = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ e->_result.${result_field} = ${native_op} *val;\n\ return &e->_result.${result_field};\n\ }\n\n") binary_op_divid = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op1 = e->children()[0];\n\ ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\ Expr* op2 = e->children()[1];\n\ ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\ if (val1 == NULL || val2 == NULL) return NULL;\n\ double value= *val2;\n\ if (value == 0) return NULL;\n\ e->_result.${result_field} = (*val1 ${native_op} *val2);\n\ return &e->_result.${result_field};\n\ }\n\n") binary_op = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op1 = e->children()[0];\n\ ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\ Expr* op2 = e->children()[1];\n\ ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\ if (val1 == NULL || val2 == NULL) return NULL;\n\ e->_result.${result_field} = (*val1 ${native_op} *val2);\n\ return &e->_result.${result_field};\n\ }\n\n") double_mod = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op1 = e->children()[0];\n\ ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\ Expr* op2 = e->children()[1];\n\ ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\ if (val1 == NULL || val2 == NULL) return NULL;\n\ double value= *val2;\n\ if (value == 0) return NULL;\n\ e->_result.${result_field} = fmod(*val1, *val2);\n\ return &e->_result.${result_field};\n\ }\n\n") binary_func = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op1 = e->children()[0];\n\ ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\ Expr* op2 = e->children()[1];\n\ ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\ if (val1 == NULL || val2 == NULL) return NULL;\n\ e->_result.${result_field} = val1->${native_func}(*val2);\n\ return &e->_result.${result_field};\n\ }\n\n") float_to_decimal = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ e->_result.${result_field}.assign_from_float(*val);;\n\ return &e->_result.${result_field};\n\ }\n\n") double_to_decimal = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ e->_result.${result_field}.assign_from_double(*val);;\n\ return &e->_result.${result_field};\n\ }\n\n") cast = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ e->_result.${result_field} = *val;\n\ return &e->_result.${result_field};\n\ }\n\n") string_to_int = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ StringParser::ParseResult result;\n\ e->_result.${result_field} = \ StringParser::string_to_int<${native_type2}>(val->ptr, val->len, &result);\n\ if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\ return &e->_result.${result_field};\n\ }\n\n") string_to_float = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ StringParser::ParseResult result;\n\ e->_result.${result_field} = \ StringParser::string_to_float<${native_type2}>(val->ptr, val->len, &result);\n\ if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\ return &e->_result.${result_field};\n\ }\n\n") numeric_to_date = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ DateTimeValue *date_val = &e->_result.${result_field};\n\ if (!date_val->from_date_int64(*val)) {\n\ return NULL;\n\ }\n\ date_val->cast_to_date();\n\ return date_val;\n\ }\n\n") string_to_date = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ DateTimeValue *date_val = &e->_result.${result_field};\n\ if (!date_val->from_date_str(val->ptr, val->len)) {\n\ return NULL;\n\ }\n\ date_val->cast_to_date();\n\ return date_val;\n\ }\n\n") datetime_to_date = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ DateTimeValue *date_val = &e->_result.${result_field};\n\ *date_val = *val;\n\ date_val->cast_to_date();\n\ return date_val;\n\ }\n\n") numeric_to_datetime = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ DateTimeValue *date_val = &e->_result.${result_field};\n\ if (!date_val->from_date_int64(*val)) {\n\ return NULL;\n\ }\n\ date_val->to_datetime();\n\ return date_val;\n\ }\n\n") string_to_datetime = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ DateTimeValue *date_val = &e->_result.${result_field};\n\ if (!date_val->from_date_str(val->ptr, val->len)) {\n\ return NULL;\n\ }\n\ date_val->to_datetime();\n\ return date_val;\n\ }\n\n") date_to_datetime = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ DateTimeValue *date_val = &e->_result.${result_field};\n\ *date_val = *val;\n\ date_val->to_datetime();\n\ return date_val;\n\ }\n\n") datetime_to_numeric = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ e->_result.${result_field} = val->to_int64();\n\ return &e->_result.${result_field};\n\ }\n\n") decimal_to_string = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ e->_result.set_string_val(val->to_string());\n\ return &e->_result.${result_field};\n\ }\n\n") datetime_to_string = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ char buf[64];\n\ val->to_string(buf);\n\ e->_result.set_string_val(buf);\n\ return &e->_result.${result_field};\n\ }\n\n") numeric_to_string = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ e->_result.set_string_val(std::to_string(*val));\n\ return &e->_result.${result_field};\n\ }\n\n") largeint_to_string = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ char buf[64];\n\ int len = 64;\n\ char *str = LargeIntValue::to_string(*val, buf, &len);\n\ e->_result.set_string_val(std::string(str, len));\n\ return &e->_result.${result_field};\n\ }\n\n") float_to_string = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ char buf[64];\n\ my_gcvt(*val, MY_GCVT_ARG_FLOAT, 64, buf, NULL);\n\ e->_result.set_string_val(buf);\n\ return &e->_result.${result_field};\n\ }\n\n") double_to_string = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ char buf[64];\n\ my_gcvt(*val, MY_GCVT_ARG_DOUBLE, 64, buf, NULL);\n\ e->_result.set_string_val(buf);\n\ return &e->_result.${result_field};\n\ }\n\n") # Need to special case tinyint. boost thinks it is a char and handles it differently. # e.g. '0' is written as an empty string. string_to_tinyint = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ string tmp(val->ptr, val->len);\n\ try {\n\ e->_result.${result_field} = static_cast(lexical_cast(tmp));\n\ } catch (bad_lexical_cast &) {\n\ return NULL;\n\ }\n\ return &e->_result.${result_field};\n\ }\n\n") tinyint_to_string = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ Expr* op = e->children()[0];\n\ ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\ if (val == NULL) return NULL;\n\ int64_t tmp_val = *val;\n\ e->_result.set_string_val(lexical_cast(tmp_val));\n\ return &e->_result.${result_field};\n\ }\n\n") case = string.Template("\ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\ CaseExpr* expr = static_cast(e);\n\ int num_children = e->get_num_children();\n\ int loop_end = (expr->has_else_expr()) ? num_children - 1 : num_children;\n\ // Make sure we set the right compute function.\n\ DCHECK_EQ(expr->has_case_expr(), true);\n\ // Need at least case, when and then expr, and optionally an else.\n\ DCHECK_GE(num_children, (expr->has_else_expr()) ? 4 : 3);\n\ // All case and when exprs return the same type (we guaranteed that during analysis).\n\ void* case_val = e->children()[0]->get_value(row);\n\ if (case_val == NULL) {\n\ if (expr->has_else_expr()) {\n\ // Return else value.\n\ return e->children()[num_children - 1]->get_value(row);\n\ } else {\n\ return NULL;\n\ }\n\ }\n\ for (int i = 1; i < loop_end; i += 2) {\n\ ${native_type1}* when_val =\n\ reinterpret_cast<${native_type1}*>(e->children()[i]->get_value(row));\n\ if (when_val == NULL) continue;\n\ if (*reinterpret_cast<${native_type1}*>(case_val) == *when_val) {\n\ // Return then value.\n\ return e->children()[i + 1]->get_value(row);\n\ }\n\ }\n\ if (expr->has_else_expr()) {\n\ // Return else value.\n\ return e->children()[num_children - 1]->get_value(row);\n\ }\n\ return NULL;\n\ }\n\n") python_template = string.Template("\ ['${fn_name}', '${return_type}', [${args}], 'ComputeFunctions::${fn_signature}', []], \n") # Mapping of function to template templates = { 'Add': binary_op, 'Subtract': binary_op, 'Multiply': binary_op, 'Divide': binary_op_divid, 'Int_Divide': binary_op_divid, 'Mod': binary_op_divid, 'BitAnd': binary_op, 'BitXor': binary_op, 'BitOr': binary_op, 'BitNot': unary_op, 'Eq': binary_op, 'Ne': binary_op, 'Ge': binary_op, 'Gt': binary_op, 'Lt': binary_op, 'Le': binary_op, 'Cast': cast, } # Some aggregate types that are useful for defining functions types = { 'BOOLEAN': ['BOOLEAN'], 'TINYINT': ['TINYINT'], 'SMALLINT': ['SMALLINT'], 'INT': ['INT'], 'BIGINT': ['BIGINT'], 'LARGEINT': ['LARGEINT'], 'FLOAT': ['FLOAT'], 'DOUBLE': ['DOUBLE'], 'STRING': ['VARCHAR'], 'DATE': ['DATE'], 'DATETIME': ['DATETIME'], 'DECIMALV2': ['DECIMALV2'], 'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'], 'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], 'FLOAT_TYPES': ['FLOAT', 'DOUBLE'], 'NUMERIC_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', \ 'LARGEINT', 'DECIMALV2'], 'STRING_TYPES': ['VARCHAR'], 'DATETIME_TYPES': ['DATE', 'DATETIME'], 'FIXED_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], 'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'], 'STRCAST_FIXED_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'], 'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\ 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'], 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'], } # Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]] functions = [ # Arithmetic Expr ['Add', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]], ['Subtract', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]], ['Multiply', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]], ['Divide', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]], ['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], ['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], ['Mod', ['DECIMALV2'], [['DECIMALV2'], ['DECIMALV2']]], ['Mod', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']], double_mod], ['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], ['BitXor', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], ['BitOr', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], ['BitNot', ['INT_TYPES'], [['INT_TYPES']]], # BinaryPredicates ['Eq', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]], ['Ne', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]], ['Gt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]], ['Lt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]], ['Ge', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]], ['Le', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]], ['Eq', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],], ['Ne', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],], ['Gt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],], ['Lt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],], ['Ge', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],], ['Le', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],], ['Eq', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func], ['Ne', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func], ['Gt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func], ['Lt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func], ['Ge', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func], ['Le', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func], ['Eq', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Ne', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Gt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Lt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Ge', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Le', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Eq', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Ne', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Gt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Lt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Ge', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Le', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], # Casts ['Cast', ['BOOLEAN'], [['NATIVE_TYPES'], ['BOOLEAN']]], ['Cast', ['TINYINT'], [['NATIVE_TYPES'], ['TINYINT']]], ['Cast', ['SMALLINT'], [['NATIVE_TYPES'], ['SMALLINT']]], ['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']]], ['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']]], ['Cast', ['LARGEINT'], [['NATIVE_TYPES'], ['LARGEINT']]], ['Cast', ['LARGEINT'], [['DECIMALV2'], ['LARGEINT']]], ['Cast', ['NATIVE_TYPES'], [['LARGEINT'], ['NATIVE_TYPES']]], ['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']]], ['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']]], ['Cast', ['DECIMALV2'], [['FIXED_TYPES'], ['DECIMALV2']]], ['Cast', ['DECIMALV2'], [['FLOAT'], ['DECIMALV2']], float_to_decimal], ['Cast', ['DECIMALV2'], [['DOUBLE'], ['DECIMALV2']], double_to_decimal], ['Cast', ['NATIVE_TYPES'], [['DECIMALV2'], ['NATIVE_TYPES']]], ['Cast', ['NATIVE_INT_TYPES'], [['STRING'], ['NATIVE_INT_TYPES']], string_to_int], ['Cast', ['LARGEINT'], [['STRING'], ['LARGEINT']], string_to_int], ['Cast', ['FLOAT_TYPES'], [['STRING'], ['FLOAT_TYPES']], string_to_float], ['Cast', ['STRING'], [['STRCAST_FIXED_TYPES'], ['STRING']], numeric_to_string], ['Cast', ['STRING'], [['LARGEINT'], ['STRING']], largeint_to_string], ['Cast', ['STRING'], [['FLOAT'], ['STRING']], float_to_string], ['Cast', ['STRING'], [['DOUBLE'], ['STRING']], double_to_string], ['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string], ['Cast', ['STRING'], [['DECIMALV2'], ['STRING']], decimal_to_string], # Datetime cast ['Cast', ['DATE'], [['NUMERIC_TYPES'], ['DATE']], numeric_to_date], ['Cast', ['DATETIME'], [['NUMERIC_TYPES'], ['DATETIME']], numeric_to_datetime], ['Cast', ['DATE'], [['STRING_TYPES'], ['DATE']], string_to_date], ['Cast', ['DATETIME'], [['STRING_TYPES'], ['DATETIME']], string_to_datetime], ['Cast', ['DATE'], [['DATETIME'], ['DATE']], datetime_to_date], ['Cast', ['DATETIME'], [['DATE'], ['DATETIME']], date_to_datetime], ['Cast', ['NUMERIC_TYPES'], [['DATETIME'], ['NUMERIC_TYPES']], datetime_to_numeric], ['Cast', ['NUMERIC_TYPES'], [['DATE'], ['NUMERIC_TYPES']], datetime_to_numeric], ['Cast', ['STRING_TYPES'], [['DATE'], ['STRING_TYPES']], datetime_to_string], ['Cast', ['STRING_TYPES'], [['DATETIME'], ['STRING_TYPES']], datetime_to_string], # Case # The case expr is special because it has a variable number of function args, # but we guarantee that all of them are of the same type during query analysis, # so we just list exactly one here. # In addition, the return type given here is a dummy, because it is # not necessarily the same as the function args type. ['Case', ['ALL_TYPES'], [['ALL_TYPES']], case], ] native_types = { 'BOOLEAN': 'bool', 'TINYINT': 'char', 'SMALLINT': 'short', 'INT': 'int', 'BIGINT': 'long', 'LARGEINT': '__int128', 'FLOAT': 'float', 'DOUBLE': 'double', 'VARCHAR': 'StringValue', 'DATE': 'Date', 'DATETIME': 'DateTime', 'TIME': 'double', 'DECIMALV2': 'DecimalV2Value', } # Portable type used in the function implementation implemented_types = { 'BOOLEAN': 'bool', 'TINYINT': 'int8_t', 'SMALLINT': 'int16_t', 'INT': 'int32_t', 'BIGINT': 'int64_t', 'LARGEINT': '__int128', 'FLOAT': 'float', 'DOUBLE': 'double', 'VARCHAR': 'StringValue', 'DATE': 'DateTimeValue', 'DATETIME': 'DateTimeValue', 'TIME': 'double', 'DECIMALV2': 'DecimalV2Value', } result_fields = { 'BOOLEAN': 'bool_val', 'TINYINT': 'tinyint_val', 'SMALLINT': 'smallint_val', 'INT': 'int_val', 'BIGINT': 'bigint_val', 'LARGEINT': 'large_int_val', 'FLOAT': 'float_val', 'DOUBLE': 'double_val', 'VARCHAR': 'string_val', 'DATE': 'datetime_val', 'DATETIME': 'datetime_val', 'TIME': 'double_val', 'DECIMALV2': 'decimalv2_val', } native_ops = { 'BITAND': '&', 'BITNOT': '~', 'BITOR': '|', 'BITXOR': '^', 'DIVIDE': '/', 'EQ': '==', 'GT': '>', 'GE': '>=', 'INT_DIVIDE': '/', 'SUBTRACT': '-', 'MOD': '%', 'MULTIPLY': '*', 'LT': '<', 'LE': '<=', 'NE': '!=', 'ADD': '+', } native_funcs = { 'EQ': 'eq', 'LE': 'le', 'LT': 'lt', 'NE': 'ne', 'GE': 'ge', 'GT': 'gt', } cc_preamble = '\ \n\ // This is a generated file, DO NOT EDIT.\n\ // To add new functions, see impala/common/function-registry/gen_opcodes.py\n\ \n\ #include "gen_cpp/opcode/functions.h"\n\ #include "exprs/expr.h"\n\ #include "exprs/case_expr.h"\n\ #include "vec/common/string_tmp.h"\n\ #include "runtime/tuple_row.h"\n\ #include "util/mysql_dtoa.h"\n\ #include "util/string_parser.hpp"\n\ #include \n\ \n\ using namespace boost;\n\ using namespace std;\n\ \n\ namespace doris { \n\ \n' cc_epilogue = '\ }\n' h_preamble = '\ \n\ #ifndef DORIS_OPCODE_FUNCTIONS_H\n\ #define DORIS_OPCODE_FUNCTIONS_H\n\ \n\ namespace doris {\n\ class Expr;\n\ class OpcodeRegistry;\n\ class TupleRow;\n\ \n\ class ComputeFunctions {\n\ public:\n' h_epilogue = '\ };\n\ \n\ }\n\ \n\ #endif\n' python_preamble = '\ #!/usr/bin/env python\n\ \n\ # This is a generated file, DO NOT EDIT IT.\n\ # To add new functions, see impala/common/function-registry/gen_opcodes.py\n\ \n\ functions = [\n' python_epilogue = ']' header_template = string.Template("\ static void* ${fn_signature}(Expr* e, TupleRow* row);\n") BE_PATH = "../gen_cpp/opcode/" def initialize_sub(op, return_type, arg_types): """ Expand the signature data for template substitution. Returns a dictionary with all the entries for all the templates used in this script """ sub = {} sub["fn_name"] = op sub["fn_signature"] = op sub["return_type"] = return_type sub["result_field"] = result_fields[return_type] sub["args"] = "" if op.upper() in native_ops: sub["native_op"] = native_ops[op.upper()] for idx in range(0, len(arg_types)): arg = arg_types[idx] sub["fn_signature"] += "_" + native_types[arg] sub["native_type" + repr(idx + 1)] = implemented_types[arg] sub["args"] += "'" + arg + "', " return sub if __name__ == "__main__": try: os.makedirs(BE_PATH) except OSError as e: if e.errno == errno.EEXIST: pass else: raise h_file = open(BE_PATH + 'functions.h', 'w') cc_file = open(BE_PATH + 'functions.cc', 'w') python_file = open('generated_functions.py', 'w') h_file.write(h_preamble) cc_file.write(cc_preamble) python_file.write(python_preamble) # Generate functions and headers for func_data in functions: op = func_data[0] # If a specific template has been specified, use that one. if len(func_data) >= 4: template = func_data[3] else: # Skip functions with no template (shouldn't be auto-generated) if not op in templates: continue template = templates[op] # Expand all arguments return_types = [] for ret in func_data[1]: for t in types[ret]: return_types.append(t) signatures = [] for args in func_data[2]: expanded_arg = [] for arg in args: for t in types[arg]: expanded_arg.append(t) signatures.append(expanded_arg) # Put arguments into substitution structure num_functions = 0 for args in signatures: num_functions = max(num_functions, len(args)) num_functions = max(num_functions, len(return_types)) num_args = len(signatures) # Validate the input is correct if len(return_types) != 1 and len(return_types) != num_functions: print("Invalid Declaration: " + func_data) sys.exit(1) for args in signatures: if len(args) != 1 and len(args) != num_functions: print("Invalid Declaration: " + func_data) sys.exit(1) # Iterate over every function signature to generate for i in range(0, num_functions): if len(return_types) == 1: return_type = return_types[0] else: return_type = return_types[i] arg_types = [] for j in range(0, num_args): if len(signatures[j]) == 1: arg_types.append(signatures[j][0]) else: arg_types.append(signatures[j][i]) # At this point, 'return_type' is a single type and 'arg_types' # is a list of single types sub = initialize_sub(op, return_type, arg_types) if template == binary_func: sub["native_func"] = native_funcs[op.upper()] h_file.write(header_template.substitute(sub)) cc_file.write(template.substitute(sub)) python_file.write(python_template.substitute(sub)) h_file.write(h_epilogue) cc_file.write(cc_epilogue) python_file.write(python_epilogue) h_file.close() cc_file.close() python_file.close()