In this PR, I have refactored the initialization of the FunctionSet. Previously, all the functions were in one large method which led to the generation of Java code that was too long. This posed a problem for the compiler, as the length of the method exceeded the limit imposed by the Java compiler. To resolve this issue and improve the readability and manageability of our code, I have categorized these functions by type, and created dedicated initialization methods for each type. As such, our code is now not only more readable and understandable, but also each method is of a length that is acceptable to the compiler and can be compiled successfully. Moreover, this change makes it easier for us to add new functions as we can directly locate the right category and add new functions there. This is a significant change aimed at enhancing the maintainability and scalability of our code, while ensuring that our code can be successfully compiled.
241 lines
9.5 KiB
Python
Executable File
241 lines
9.5 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""
|
|
This module is doris builtin functions
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import errno
|
|
from string import Template
|
|
import doris_builtins_functions
|
|
|
|
java_registry_preamble = '\
|
|
// Licensed to the Apache Software Foundation (ASF) under one \n\
|
|
// or more contributor license agreements. See the NOTICE file \n\
|
|
// distributed with this work for additional information \n\
|
|
// regarding copyright ownership. The ASF licenses this file \n\
|
|
// to you under the Apache License, Version 2.0 (the \n\
|
|
// "License"); you may not use this file except in compliance \n\
|
|
// with the License. You may obtain a copy of the License at \n\
|
|
// \n\
|
|
// http://www.apache.org/licenses/LICENSE-2.0\n\
|
|
// \n\
|
|
// Unless required by applicable law or agreed to in writing, software\n\
|
|
// distributed under the License is distributed on an "AS IS" BASIS,\n\
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n\
|
|
// See the License for the specific language governing permissions and\n\
|
|
// limitations under the License.\n\
|
|
\n\
|
|
// This is a generated file, DO NOT EDIT.\n\
|
|
// To add new functions, see the generator at\n\
|
|
// common/function-registry/gen_builtins_catalog.py or the function list at\n\
|
|
// common/function-registry/doris_builtins_functions.py.\n\
|
|
\n\
|
|
package org.apache.doris.builtins;\n\
|
|
\n\
|
|
import org.apache.doris.catalog.ArrayType;\n\
|
|
import org.apache.doris.catalog.MapType;\n\
|
|
import org.apache.doris.catalog.StructType;\n\
|
|
import org.apache.doris.catalog.TemplateType;\n\
|
|
import org.apache.doris.catalog.Type;\n\
|
|
import org.apache.doris.catalog.Function;\n\
|
|
import org.apache.doris.catalog.FunctionSet;\n\
|
|
import com.google.common.collect.Sets;\n\
|
|
import java.util.Set;\n\
|
|
\n\
|
|
public class ScalarBuiltins { \n\
|
|
public static void initBuiltins(FunctionSet functionSet) { \
|
|
\n'
|
|
|
|
java_registry_epilogue = '\
|
|
}\n'
|
|
|
|
FE_PATH = "../../../fe/fe-core/target/generated-sources/build/org/apache/doris/builtins/"
|
|
print(FE_PATH)
|
|
|
|
# This contains all the metadata to describe all the builtins.
|
|
# Each meta data entry is itself a map to store all the meta data
|
|
# - fn_name, ret_type, args, symbol, sql_names, template_types(optional)
|
|
|
|
"""
|
|
generate fe data type, support nested ARRAY type.
|
|
for example:
|
|
in[TINYINT] --> out[Type.TINYINT]
|
|
in[INT] --> out[Type.INT]
|
|
in[ARRAY_INT] --> out[new ArrayType(Type.INT)]
|
|
in[MAP_STRING_INT] --> out[new MapType(Type.STRING,Type.INT)]
|
|
"""
|
|
def generate_fe_datatype(str_type, template_types):
|
|
# delete whitespace
|
|
str_type = str_type.replace(' ', '').replace('\t', '')
|
|
|
|
# delete ellipsis dots
|
|
str_type = str_type.replace('...', '')
|
|
|
|
# process template
|
|
if str_type in template_types:
|
|
return 'new TemplateType("{0}")'.format(str_type)
|
|
elif str_type + "..." in template_types:
|
|
return 'new TemplateType("{0}", true)'.format(str_type)
|
|
|
|
# process Array, Map, Struct template
|
|
template_start = str_type.find('<')
|
|
template_end = str_type.rfind('>')
|
|
if template_start >= 0 and template_end > 0:
|
|
# exclude <>
|
|
template = str_type[template_start + 1 : template_end]
|
|
if str_type.startswith("ARRAY<"):
|
|
return 'new ArrayType({0})'.format(generate_fe_datatype(template, template_types))
|
|
elif str_type.startswith("MAP<"):
|
|
types = template.split(',', 2)
|
|
return 'new MapType({0}, {1})'.format(generate_fe_datatype(types[0], template_types), generate_fe_datatype(types[1], template_types))
|
|
elif str_type.startswith("STRUCT<"):
|
|
types = template.split(',')
|
|
field_str = generate_fe_datatype(types[0], template_types)
|
|
for i in range(1, len(types)):
|
|
field_str += ", " + generate_fe_datatype(types[i], template_types)
|
|
return 'new StructType({0})'.format(field_str)
|
|
|
|
# lagacy Array, Map syntax
|
|
if str_type.startswith("ARRAY_"):
|
|
vec_type = str_type.split('_', 1);
|
|
if len(vec_type) > 1 and vec_type[0] == "ARRAY":
|
|
return "new ArrayType(" + generate_fe_datatype(vec_type[1], template_types) + ")"
|
|
if str_type.startswith("MAP_"):
|
|
vec_type = str_type.split('_', 2)
|
|
if len(vec_type) > 2 and vec_type[0] == "MAP":
|
|
return "new MapType(" + generate_fe_datatype(vec_type[1], template_types) + "," + generate_fe_datatype(vec_type[2], template_types)+")"
|
|
if str_type == "DECIMALV2":
|
|
return "Type.MAX_DECIMALV2_TYPE"
|
|
if str_type == "DECIMAL32":
|
|
return "Type.DECIMAL32"
|
|
if str_type == "DECIMAL64":
|
|
return "Type.DECIMAL64"
|
|
if str_type == "DECIMAL128":
|
|
return "Type.DECIMAL128"
|
|
return "Type." + str_type
|
|
|
|
"""
|
|
Order of params:
|
|
name, symbol, user_visible, prepare, close, nullable_mode, ret_type, has_var_args, args
|
|
"""
|
|
def generate_fe_entry(entry, name):
|
|
"""add function
|
|
"""
|
|
java_output = ""
|
|
java_output += "\"" + name + "\""
|
|
if entry["user_visible"]:
|
|
java_output += ", true"
|
|
else:
|
|
java_output += ", false"
|
|
java_output += ", Function.NullableMode." + entry["nullable_mode"]
|
|
java_output += ", " + generate_fe_datatype(entry["ret_type"], entry["template_types"])
|
|
|
|
# Check the last entry for varargs indicator.
|
|
if entry["args"] and entry["args"][-1] == "...":
|
|
entry["args"].pop()
|
|
java_output += ", true"
|
|
else:
|
|
java_output += ", false"
|
|
for arg in entry["args"]:
|
|
java_output += ", " + generate_fe_datatype(arg, entry["template_types"])
|
|
return java_output
|
|
|
|
# Generates the FE builtins init file that registers all the builtins.
|
|
def generate_fe_registry_init(filename):
|
|
"""add function
|
|
"""
|
|
java_registry_file = open(filename, "w")
|
|
java_registry_file.write(java_registry_preamble)
|
|
|
|
# Generate initialization calls for each category
|
|
for category, functions in doris_builtins_functions.visible_functions.items():
|
|
java_registry_file.write(" init{}Builtins(functionSet);\n".format(category.capitalize()))
|
|
|
|
# add non_null_result_with_null_param_functions
|
|
java_registry_file.write(" Set<String> funcNames = Sets.newHashSet();\n")
|
|
for entry in doris_builtins_functions.null_result_with_one_null_param_functions:
|
|
java_registry_file.write(" funcNames.add(\"%s\");\n" % entry)
|
|
java_registry_file.write(" functionSet.buildNullResultWithOneNullParamFunction(funcNames);\n");
|
|
|
|
# add nondeterministic functions
|
|
java_registry_file.write(" Set<String> nondeterministicFuncNames = Sets.newHashSet();\n")
|
|
for entry in doris_builtins_functions.nondeterministic_functions:
|
|
java_registry_file.write(" nondeterministicFuncNames.add(\"%s\");\n" % entry)
|
|
java_registry_file.write(" functionSet.buildNondeterministicFunctions(nondeterministicFuncNames);\n");
|
|
|
|
java_registry_file.write(" funcNames = Sets.newHashSet();\n")
|
|
for entry in doris_builtins_functions.null_result_with_one_null_param_functions:
|
|
java_registry_file.write(" funcNames.add(\"%s\");\n" % entry)
|
|
java_registry_file.write(" functionSet.buildNullResultWithOneNullParamFunction(funcNames);\n");
|
|
|
|
java_registry_file.write(" }\n")
|
|
java_registry_file.write("\n")
|
|
|
|
# Generate functions for each category
|
|
for category, functions in doris_builtins_functions.visible_functions.items():
|
|
generate_fe_category(category, functions, java_registry_file, True)
|
|
|
|
java_registry_file.write(java_registry_epilogue)
|
|
java_registry_file.close()
|
|
|
|
def generate_fe_category(category, functions, java_registry_file, user_visible):
|
|
java_registry_file.write(" private static void init{}Builtins(FunctionSet functionSet) {{\n".format(category.capitalize()))
|
|
for function in functions:
|
|
assert len(function) >= 4, \
|
|
"Invalid function entry in doris_builtins_functions.py:\n\t" + repr(function)
|
|
entry = {}
|
|
entry["sql_names"] = function[0]
|
|
entry["ret_type"] = function[1]
|
|
entry["args"] = function[2]
|
|
if function[3] != '':
|
|
entry['nullable_mode'] = function[3]
|
|
else:
|
|
entry['nullable_mode'] = 'DEPEND_ON_ARGUMENT'
|
|
|
|
# process template
|
|
if len(function) >= 5:
|
|
entry["template_types"] = function[4]
|
|
else:
|
|
entry["template_types"] = []
|
|
|
|
entry["user_visible"] = user_visible
|
|
|
|
for name in entry["sql_names"]:
|
|
java_output = generate_fe_entry(entry, name)
|
|
java_registry_file.write(" functionSet.addScalarAndVectorizedBuiltin(%s);\n" % java_output)
|
|
|
|
java_registry_file.write(" }\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
try:
|
|
os.makedirs(FE_PATH)
|
|
except OSError as e:
|
|
if e.errno == errno.EEXIST:
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
generate_fe_registry_init(FE_PATH + "ScalarBuiltins.java")
|