doris/gensrc/script/gen_functions.py

#!/usr/bin/env python
# encoding: utf-8

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
# This file is copied from
# https://github.com/cloudera/Impala/blob/v0.7refresh/common/function-registry/gen_functions.py
# and modified by Doris

"""
# This script will generate the implementation of the simple functions for the BE.
# These include:
#   - Arithmetic functions
#   - Binary functions
#   - Cast functions
#
# The script outputs (run: 'src/common/function/gen_functions.py')
#   - header and implemention for above functions:
#     - src/gen_cpp/opcode/functions.[h/cc]
#   - python file that contains the metadata for those functions:
#     - src/gen_cpp/generated_functions.py
"""

import string
import os
import errno

unary_op = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  e->_result.${result_field} = ${native_op} *val;\n\
  return &e->_result.${result_field};\n\
}\n\n")


binary_op_divid = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op1 = e->children()[0];\n\
  ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
  Expr* op2 = e->children()[1];\n\
  ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
  if (val1 == NULL || val2 == NULL) return NULL;\n\
  double value= *val2;\n\
  if (value == 0) return NULL;\n\
  e->_result.${result_field} = (*val1 ${native_op} *val2);\n\
  return &e->_result.${result_field};\n\
}\n\n")

binary_op = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op1 = e->children()[0];\n\
  ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
  Expr* op2 = e->children()[1];\n\
  ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
  if (val1 == NULL || val2 == NULL) return NULL;\n\
  e->_result.${result_field} = (*val1 ${native_op} *val2);\n\
  return &e->_result.${result_field};\n\
}\n\n")

double_mod = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op1 = e->children()[0];\n\
  ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
  Expr* op2 = e->children()[1];\n\
  ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
  if (val1 == NULL || val2 == NULL) return NULL;\n\
  double value= *val2;\n\
  if (value == 0) return NULL;\n\
  e->_result.${result_field} = fmod(*val1, *val2);\n\
  return &e->_result.${result_field};\n\
}\n\n")

binary_func = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op1 = e->children()[0];\n\
  ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
  Expr* op2 = e->children()[1];\n\
  ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
  if (val1 == NULL || val2 == NULL) return NULL;\n\
  e->_result.${result_field} = val1->${native_func}(*val2);\n\
  return &e->_result.${result_field};\n\
}\n\n")

float_to_decimal = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  e->_result.${result_field}.assign_from_float(*val);;\n\
  return &e->_result.${result_field};\n\
}\n\n")

double_to_decimal = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  e->_result.${result_field}.assign_from_double(*val);;\n\
  return &e->_result.${result_field};\n\
}\n\n")

cast = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  e->_result.${result_field} = *val;\n\
  return &e->_result.${result_field};\n\
}\n\n")

string_to_int = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  StringParser::ParseResult result;\n\
  e->_result.${result_field} = \
      StringParser::string_to_int<${native_type2}>(val->ptr, val->len, &result);\n\
  if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\
  return &e->_result.${result_field};\n\
}\n\n")

string_to_float = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  StringParser::ParseResult result;\n\
  e->_result.${result_field} = \
      StringParser::string_to_float<${native_type2}>(val->ptr, val->len, &result);\n\
  if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\
  return &e->_result.${result_field};\n\
}\n\n")

numeric_to_date = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  DateTimeValue *date_val = &e->_result.${result_field};\n\
  if (!date_val->from_date_int64(*val)) {\n\
    return NULL;\n\
  }\n\
  date_val->cast_to_date();\n\
  return date_val;\n\
}\n\n")

string_to_date = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  DateTimeValue *date_val = &e->_result.${result_field};\n\
  if (!date_val->from_date_str(val->ptr, val->len)) {\n\
    return NULL;\n\
  }\n\
  date_val->cast_to_date();\n\
  return date_val;\n\
}\n\n")

datetime_to_date = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  DateTimeValue *date_val = &e->_result.${result_field};\n\
  *date_val = *val;\n\
  date_val->cast_to_date();\n\
  return date_val;\n\
}\n\n")

numeric_to_datetime = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  DateTimeValue *date_val = &e->_result.${result_field};\n\
  if (!date_val->from_date_int64(*val)) {\n\
    return NULL;\n\
  }\n\
  date_val->to_datetime();\n\
  return date_val;\n\
}\n\n")

string_to_datetime = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  DateTimeValue *date_val = &e->_result.${result_field};\n\
  if (!date_val->from_date_str(val->ptr, val->len)) {\n\
    return NULL;\n\
  }\n\
  date_val->to_datetime();\n\
  return date_val;\n\
}\n\n")

date_to_datetime = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  DateTimeValue *date_val = &e->_result.${result_field};\n\
  *date_val = *val;\n\
  date_val->to_datetime();\n\
  return date_val;\n\
}\n\n")

datetime_to_numeric = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  e->_result.${result_field} = val->to_int64();\n\
  return &e->_result.${result_field};\n\
}\n\n")

decimal_to_string = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  e->_result.set_string_val(val->to_string());\n\
  return &e->_result.${result_field};\n\
}\n\n")

datetime_to_string = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  char buf[64];\n\
  val->to_string(buf);\n\
  e->_result.set_string_val(buf);\n\
  return &e->_result.${result_field};\n\
}\n\n")

numeric_to_string = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  e->_result.set_string_val(std::to_string(*val));\n\
  return &e->_result.${result_field};\n\
}\n\n")

largeint_to_string = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  char buf[64];\n\
  int len = 64;\n\
  char *str = LargeIntValue::to_string(*val, buf, &len);\n\
  e->_result.set_string_val(std::string(str, len));\n\
  return &e->_result.${result_field};\n\
}\n\n")

float_to_string = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  char buf[64];\n\
  my_gcvt(*val, MY_GCVT_ARG_FLOAT, 64, buf, NULL);\n\
  e->_result.set_string_val(buf);\n\
  return &e->_result.${result_field};\n\
}\n\n")

double_to_string = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  char buf[64];\n\
  my_gcvt(*val, MY_GCVT_ARG_DOUBLE, 64, buf, NULL);\n\
  e->_result.set_string_val(buf);\n\
  return &e->_result.${result_field};\n\
}\n\n")

# Need to special case tinyint.  boost thinks it is a char and handles it differently.
# e.g. '0' is written as an empty string.
string_to_tinyint = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  string tmp(val->ptr, val->len);\n\
  try {\n\
    e->_result.${result_field} = static_cast<int8_t>(lexical_cast<int16_t>(tmp));\n\
  } catch (bad_lexical_cast &) {\n\
    return NULL;\n\
  }\n\
  return &e->_result.${result_field};\n\
}\n\n")

tinyint_to_string = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  Expr* op = e->children()[0];\n\
  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
  if (val == NULL) return NULL;\n\
  int64_t tmp_val = *val;\n\
  e->_result.set_string_val(lexical_cast<string>(tmp_val));\n\
  return &e->_result.${result_field};\n\
}\n\n")

case = string.Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  CaseExpr* expr = static_cast<CaseExpr*>(e);\n\
  int num_children = e->get_num_children();\n\
  int loop_end = (expr->has_else_expr()) ? num_children - 1 : num_children;\n\
  // Make sure we set the right compute function.\n\
  DCHECK_EQ(expr->has_case_expr(), true);\n\
  // Need at least case, when and then expr, and optionally an else.\n\
  DCHECK_GE(num_children, (expr->has_else_expr()) ? 4 : 3);\n\
  // All case and when exprs return the same type (we guaranteed that during analysis).\n\
  void* case_val = e->children()[0]->get_value(row);\n\
  if (case_val == NULL) {\n\
    if (expr->has_else_expr()) {\n\
      // Return else value.\n\
      return e->children()[num_children - 1]->get_value(row);\n\
    } else {\n\
      return NULL;\n\
    }\n\
  }\n\
  for (int i = 1; i < loop_end; i += 2) {\n\
    ${native_type1}* when_val =\n\
        reinterpret_cast<${native_type1}*>(e->children()[i]->get_value(row));\n\
    if (when_val == NULL) continue;\n\
    if (*reinterpret_cast<${native_type1}*>(case_val) == *when_val) {\n\
      // Return then value.\n\
      return e->children()[i + 1]->get_value(row);\n\
    }\n\
  }\n\
  if (expr->has_else_expr()) {\n\
    // Return else value.\n\
    return e->children()[num_children - 1]->get_value(row);\n\
  }\n\
  return NULL;\n\
}\n\n")

python_template = string.Template("\
  ['${fn_name}', '${return_type}', [${args}], 'ComputeFunctions::${fn_signature}', []], \n")

# Mapping of function to template
templates = {
  'Add': binary_op,
  'Subtract': binary_op,
  'Multiply': binary_op,
  'Divide': binary_op_divid,
  'Int_Divide': binary_op_divid,
  'Mod': binary_op_divid,
  'BitAnd': binary_op,
  'BitXor': binary_op,
  'BitOr': binary_op,
  'BitNot': unary_op,
  'Eq': binary_op,
  'Ne': binary_op,
  'Ge': binary_op,
  'Gt': binary_op,
  'Lt': binary_op,
  'Le': binary_op,
  'Cast': cast,
}

# Some aggregate types that are useful for defining functions
types = {
  'BOOLEAN': ['BOOLEAN'],
  'TINYINT': ['TINYINT'],
  'SMALLINT': ['SMALLINT'],
  'INT': ['INT'],
  'BIGINT': ['BIGINT'],
  'LARGEINT': ['LARGEINT'],
  'FLOAT': ['FLOAT'],
  'DOUBLE': ['DOUBLE'],
  'STRING': ['VARCHAR'],
  'DATE': ['DATE'],
  'DATETIME': ['DATETIME'],
  'DECIMALV2': ['DECIMALV2'],
  'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'],
  'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'],
  'FLOAT_TYPES': ['FLOAT', 'DOUBLE'],
  'NUMERIC_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', \
          'LARGEINT', 'DECIMALV2'],
  'STRING_TYPES': ['VARCHAR'],
  'DATETIME_TYPES': ['DATE', 'DATETIME'],
  'FIXED_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'],
  'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
  'STRCAST_FIXED_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'],
  'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\
                     'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'],
  'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'],
}

# Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]]
functions = [
  # Arithmetic Expr
  ['Add', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
  ['Subtract', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
  ['Multiply', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
  ['Divide', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
  ['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
  ['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
  ['Mod', ['DECIMALV2'], [['DECIMALV2'], ['DECIMALV2']]],
  ['Mod', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']], double_mod],
  ['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
  ['BitXor', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
  ['BitOr', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
  ['BitNot', ['INT_TYPES'], [['INT_TYPES']]],

  # BinaryPredicates
  ['Eq', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
  ['Ne', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
  ['Gt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
  ['Lt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
  ['Ge', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
  ['Le', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
  ['Eq', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
  ['Ne', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
  ['Gt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
  ['Lt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
  ['Ge', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
  ['Le', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
  ['Eq', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
  ['Ne', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
  ['Gt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
  ['Lt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
  ['Ge', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
  ['Le', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
  ['Eq', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
  ['Ne', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
  ['Gt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
  ['Lt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
  ['Ge', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
  ['Le', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
  ['Eq', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
  ['Ne', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
  ['Gt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
  ['Lt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
  ['Ge', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
  ['Le', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],

  # Casts
  ['Cast', ['BOOLEAN'], [['NATIVE_TYPES'], ['BOOLEAN']]],
  ['Cast', ['TINYINT'], [['NATIVE_TYPES'], ['TINYINT']]],
  ['Cast', ['SMALLINT'], [['NATIVE_TYPES'], ['SMALLINT']]],
  ['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']]],
  ['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']]],
  ['Cast', ['LARGEINT'], [['NATIVE_TYPES'], ['LARGEINT']]],
  ['Cast', ['LARGEINT'], [['DECIMALV2'], ['LARGEINT']]],
  ['Cast', ['NATIVE_TYPES'], [['LARGEINT'], ['NATIVE_TYPES']]],
  ['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']]],
  ['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']]],
  ['Cast', ['DECIMALV2'], [['FIXED_TYPES'], ['DECIMALV2']]],
  ['Cast', ['DECIMALV2'], [['FLOAT'], ['DECIMALV2']], float_to_decimal],
  ['Cast', ['DECIMALV2'], [['DOUBLE'], ['DECIMALV2']], double_to_decimal],
  ['Cast', ['NATIVE_TYPES'], [['DECIMALV2'], ['NATIVE_TYPES']]],
  ['Cast', ['NATIVE_INT_TYPES'], [['STRING'], ['NATIVE_INT_TYPES']], string_to_int],
  ['Cast', ['LARGEINT'], [['STRING'], ['LARGEINT']], string_to_int],
  ['Cast', ['FLOAT_TYPES'], [['STRING'], ['FLOAT_TYPES']], string_to_float],
  ['Cast', ['STRING'], [['STRCAST_FIXED_TYPES'], ['STRING']], numeric_to_string],
  ['Cast', ['STRING'], [['LARGEINT'], ['STRING']], largeint_to_string],
  ['Cast', ['STRING'], [['FLOAT'], ['STRING']], float_to_string],
  ['Cast', ['STRING'], [['DOUBLE'], ['STRING']], double_to_string],
  ['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string],
  ['Cast', ['STRING'], [['DECIMALV2'], ['STRING']], decimal_to_string],
  # Datetime cast
  ['Cast', ['DATE'], [['NUMERIC_TYPES'], ['DATE']], numeric_to_date],
  ['Cast', ['DATETIME'], [['NUMERIC_TYPES'], ['DATETIME']], numeric_to_datetime],
  ['Cast', ['DATE'], [['STRING_TYPES'], ['DATE']], string_to_date],
  ['Cast', ['DATETIME'], [['STRING_TYPES'], ['DATETIME']], string_to_datetime],
  ['Cast', ['DATE'], [['DATETIME'], ['DATE']], datetime_to_date],
  ['Cast', ['DATETIME'], [['DATE'], ['DATETIME']], date_to_datetime],
  ['Cast', ['NUMERIC_TYPES'], [['DATETIME'], ['NUMERIC_TYPES']], datetime_to_numeric],
  ['Cast', ['NUMERIC_TYPES'], [['DATE'], ['NUMERIC_TYPES']], datetime_to_numeric],
  ['Cast', ['STRING_TYPES'], [['DATE'], ['STRING_TYPES']], datetime_to_string],
  ['Cast', ['STRING_TYPES'], [['DATETIME'], ['STRING_TYPES']], datetime_to_string],

  # Case
  # The case expr is special because it has a variable number of function args,
  # but we guarantee that all of them are of the same type during query analysis,
  # so we just list exactly one here.
  # In addition, the return type given here is a dummy, because it is
  # not necessarily the same as the function args type.
  ['Case', ['ALL_TYPES'], [['ALL_TYPES']], case],
]

native_types = {
  'BOOLEAN': 'bool',
  'TINYINT': 'char',
  'SMALLINT': 'short',
  'INT': 'int',
  'BIGINT': 'long',
  'LARGEINT': '__int128',
  'FLOAT': 'float',
  'DOUBLE': 'double',
  'VARCHAR': 'StringValue',
  'DATE': 'Date',
  'DATETIME': 'DateTime',
  'TIME': 'double',
  'DECIMALV2': 'DecimalV2Value',
}

# Portable type used in the function implementation
implemented_types = {
  'BOOLEAN': 'bool',
  'TINYINT': 'int8_t',
  'SMALLINT': 'int16_t',
  'INT': 'int32_t',
  'BIGINT': 'int64_t',
  'LARGEINT': '__int128',
  'FLOAT': 'float',
  'DOUBLE': 'double',
  'VARCHAR': 'StringValue',
  'DATE': 'DateTimeValue',
  'DATETIME': 'DateTimeValue',
  'TIME': 'double',
  'DECIMALV2': 'DecimalV2Value',
}
result_fields = {
  'BOOLEAN': 'bool_val',
  'TINYINT': 'tinyint_val',
  'SMALLINT': 'smallint_val',
  'INT': 'int_val',
  'BIGINT': 'bigint_val',
  'LARGEINT': 'large_int_val',
  'FLOAT': 'float_val',
  'DOUBLE': 'double_val',
  'VARCHAR': 'string_val',
  'DATE': 'datetime_val',
  'DATETIME': 'datetime_val',
  'TIME': 'double_val',
  'DECIMALV2': 'decimalv2_val',
}

native_ops = {
  'BITAND': '&',
  'BITNOT': '~',
  'BITOR': '|',
  'BITXOR': '^',
  'DIVIDE': '/',
  'EQ': '==',
  'GT': '>',
  'GE': '>=',
  'INT_DIVIDE': '/',
  'SUBTRACT': '-',
  'MOD': '%',
  'MULTIPLY': '*',
  'LT': '<',
  'LE': '<=',
  'NE': '!=',
  'ADD': '+',
}

native_funcs = {
  'EQ': 'eq',
  'LE': 'le',
  'LT': 'lt',
  'NE': 'ne',
  'GE': 'ge',
  'GT': 'gt',
}

cc_preamble = '\
\n\
// This is a generated file, DO NOT EDIT.\n\
// To add new functions, see impala/common/function-registry/gen_opcodes.py\n\
\n\
#include "gen_cpp/opcode/functions.h"\n\
#include "exprs/expr.h"\n\
#include "exprs/case_expr.h"\n\
#include "vec/common/string_tmp.h"\n\
#include "runtime/tuple_row.h"\n\
#include "util/mysql_dtoa.h"\n\
#include "util/string_parser.hpp"\n\
#include <boost/lexical_cast.hpp>\n\
\n\
using namespace boost;\n\
using namespace std;\n\
\n\
namespace doris { \n\
\n'

cc_epilogue = '\
}\n'

h_preamble = '\
\n\
#ifndef DORIS_OPCODE_FUNCTIONS_H\n\
#define DORIS_OPCODE_FUNCTIONS_H\n\
\n\
namespace doris {\n\
class Expr;\n\
class OpcodeRegistry;\n\
class TupleRow;\n\
\n\
class ComputeFunctions {\n\
 public:\n'

h_epilogue = '\
};\n\
\n\
}\n\
\n\
#endif\n'

python_preamble = '\
#!/usr/bin/env python\n\
\n\
# This is a generated file, DO NOT EDIT IT.\n\
# To add new functions, see impala/common/function-registry/gen_opcodes.py\n\
\n\
functions = [\n'

python_epilogue = ']'

header_template = string.Template("\
  static void* ${fn_signature}(Expr* e, TupleRow* row);\n")

BE_PATH = "../gen_cpp/opcode/"

def initialize_sub(op, return_type, arg_types):
    """
    Expand the signature data for template substitution.  Returns
    a dictionary with all the entries for all the templates used in this script
    """
    sub = {}
    sub["fn_name"] = op
    sub["fn_signature"] = op
    sub["return_type"] = return_type
    sub["result_field"] = result_fields[return_type]
    sub["args"] = ""
    if op.upper() in native_ops:
        sub["native_op"] = native_ops[op.upper()]
    for idx in range(0, len(arg_types)):
        arg = arg_types[idx]
        sub["fn_signature"] += "_" + native_types[arg]
        sub["native_type" + repr(idx + 1)] = implemented_types[arg]
        sub["args"] += "'" + arg + "', "
    return sub

if __name__ == "__main__":

    try:
        os.makedirs(BE_PATH)
    except OSError as e:
        if e.errno == errno.EEXIST:
            pass
        else:
            raise

    h_file = open(BE_PATH + 'functions.h', 'w')
    cc_file = open(BE_PATH + 'functions.cc', 'w')
    python_file = open('generated_functions.py', 'w')
    h_file.write(h_preamble)
    cc_file.write(cc_preamble)
    python_file.write(python_preamble)

    # Generate functions and headers
    for func_data in functions:
        op = func_data[0]
        # If a specific template has been specified, use that one.
        if len(func_data) >= 4:
            template = func_data[3]
        else:
            # Skip functions with no template (shouldn't be auto-generated)
            if not op in templates:
                continue
            template = templates[op]

        # Expand all arguments
        return_types = []
        for ret in func_data[1]:
            for t in types[ret]:
                return_types.append(t)
        signatures = []
        for args in func_data[2]:
            expanded_arg = []
            for arg in args:
                for t in types[arg]:
                    expanded_arg.append(t)
            signatures.append(expanded_arg)

        # Put arguments into substitution structure
        num_functions = 0
        for args in signatures:
            num_functions = max(num_functions, len(args))
        num_functions = max(num_functions, len(return_types))
        num_args = len(signatures)

        # Validate the input is correct
        if len(return_types) != 1 and len(return_types) != num_functions:
            print("Invalid Declaration: " + func_data)
            sys.exit(1)

        for args in signatures:
            if len(args) != 1 and len(args) != num_functions:
                print("Invalid Declaration: " + func_data)
                sys.exit(1)

        # Iterate over every function signature to generate
        for i in range(0, num_functions):
            if len(return_types) == 1:
                return_type = return_types[0]
            else:
                return_type = return_types[i]

            arg_types = []
            for j in range(0, num_args):
                if len(signatures[j]) == 1:
                    arg_types.append(signatures[j][0])
                else:
                    arg_types.append(signatures[j][i])

            # At this point, 'return_type' is a single type and 'arg_types'
            # is a list of single types
            sub = initialize_sub(op, return_type, arg_types)
            if template == binary_func:
                sub["native_func"] = native_funcs[op.upper()]

            h_file.write(header_template.substitute(sub))
            cc_file.write(template.substitute(sub))
            python_file.write(python_template.substitute(sub))

    h_file.write(h_epilogue)
    cc_file.write(cc_epilogue)
    python_file.write(python_epilogue)
    h_file.close()
    cc_file.close()
    python_file.close()