[fix](multi-catalog)put java udf to custom lib (#35984)

bp #34990
This commit is contained in:
slothever
2024-06-06 22:54:24 +08:00
committed by GitHub
parent 9efc7b63ec
commit c794ea18c8
11 changed files with 382 additions and 168 deletions

View File

@ -248,10 +248,7 @@ public class CreateFunctionStmt extends DdlStmt {
}
userFile = properties.getOrDefault(FILE_KEY, properties.get(OBJECT_FILE_KEY));
if (Strings.isNullOrEmpty(userFile)) {
throw new AnalysisException("No 'file' or 'object_file' in properties");
}
if (binaryType != TFunctionBinaryType.RPC) {
if (!Strings.isNullOrEmpty(userFile) && binaryType != TFunctionBinaryType.RPC) {
try {
computeObjectChecksum();
} catch (IOException | NoSuchAlgorithmException e) {
@ -304,10 +301,15 @@ public class CreateFunctionStmt extends DdlStmt {
private void analyzeUda() throws AnalysisException {
AggregateFunction.AggregateFunctionBuilder builder
= AggregateFunction.AggregateFunctionBuilder.createUdfBuilder();
URI location;
if (!Strings.isNullOrEmpty(userFile)) {
location = URI.create(userFile);
} else {
location = null;
}
builder.name(functionName).argsType(argsDef.getArgTypes()).retType(returnType.getType())
.hasVarArgs(argsDef.isVariadic()).intermediateType(intermediateType.getType())
.location(URI.create(userFile));
.location(location);
String initFnSymbol = properties.get(INIT_KEY);
if (initFnSymbol == null && !(binaryType == TFunctionBinaryType.JAVA_UDF
|| binaryType == TFunctionBinaryType.RPC)) {
@ -353,8 +355,6 @@ public class CreateFunctionStmt extends DdlStmt {
function = builder.initFnSymbol(initFnSymbol).updateFnSymbol(updateFnSymbol).mergeFnSymbol(mergeFnSymbol)
.serializeFnSymbol(serializeFnSymbol).finalizeFnSymbol(finalizeFnSymbol)
.getValueFnSymbol(getValueFnSymbol).removeFnSymbol(removeFnSymbol).symbolName(symbol).build();
URI location = URI.create(userFile);
function.setLocation(location);
function.setBinaryType(binaryType);
function.setChecksum(checksum);
@ -378,7 +378,12 @@ public class CreateFunctionStmt extends DdlStmt {
} else if (binaryType == TFunctionBinaryType.JAVA_UDF) {
analyzeJavaUdf(symbol);
}
URI location = URI.create(userFile);
URI location;
if (!Strings.isNullOrEmpty(userFile)) {
location = URI.create(userFile);
} else {
location = null;
}
function = ScalarFunction.createUdf(binaryType,
functionName, argsDef.getArgTypes(),
returnType.getType(), argsDef.isVariadic(),
@ -391,94 +396,18 @@ public class CreateFunctionStmt extends DdlStmt {
HashMap<String, Method> allMethods = new HashMap<>();
try {
if (Strings.isNullOrEmpty(userFile)) {
try {
ClassLoader cl = this.getClass().getClassLoader();
checkUdafClass(clazz, cl, allMethods);
return;
} catch (ClassNotFoundException e) {
throw new AnalysisException("Class [" + clazz + "] not found in classpath");
}
}
URL[] urls = {new URL("jar:" + userFile + "!/")};
try (URLClassLoader cl = URLClassLoader.newInstance(urls)) {
Class udfClass = cl.loadClass(clazz);
String udfClassName = udfClass.getCanonicalName();
String stateClassName = udfClassName + "$" + STATE_CLASS_NAME;
Class stateClass = cl.loadClass(stateClassName);
for (Method m : udfClass.getMethods()) {
if (!m.getDeclaringClass().equals(udfClass)) {
continue;
}
String name = m.getName();
if (allMethods.containsKey(name)) {
throw new AnalysisException(
String.format("UDF class '%s' has multiple methods with name '%s' ", udfClassName,
name));
}
allMethods.put(name, m);
}
if (allMethods.get(CREATE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", CREATE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(CREATE_METHOD_NAME, allMethods.get(CREATE_METHOD_NAME), udfClassName);
checkArgumentCount(allMethods.get(CREATE_METHOD_NAME), 0, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(CREATE_METHOD_NAME), stateClass);
}
if (allMethods.get(DESTROY_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", DESTROY_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(DESTROY_METHOD_NAME, allMethods.get(DESTROY_METHOD_NAME),
udfClassName);
checkArgumentCount(allMethods.get(DESTROY_METHOD_NAME), 1, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(DESTROY_METHOD_NAME), void.class);
}
if (allMethods.get(ADD_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", ADD_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(ADD_METHOD_NAME, allMethods.get(ADD_METHOD_NAME), udfClassName);
checkArgumentCount(allMethods.get(ADD_METHOD_NAME), argsDef.getArgTypes().length + 1, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(ADD_METHOD_NAME), void.class);
for (int i = 0; i < argsDef.getArgTypes().length; i++) {
Parameter p = allMethods.get(ADD_METHOD_NAME).getParameters()[i + 1];
checkUdfType(udfClass, allMethods.get(ADD_METHOD_NAME), argsDef.getArgTypes()[i], p.getType(),
p.getName());
}
}
if (allMethods.get(SERIALIZE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", SERIALIZE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(SERIALIZE_METHOD_NAME, allMethods.get(SERIALIZE_METHOD_NAME),
udfClassName);
checkArgumentCount(allMethods.get(SERIALIZE_METHOD_NAME), 2, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(SERIALIZE_METHOD_NAME), void.class);
}
if (allMethods.get(MERGE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", MERGE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(MERGE_METHOD_NAME, allMethods.get(MERGE_METHOD_NAME), udfClassName);
checkArgumentCount(allMethods.get(MERGE_METHOD_NAME), 2, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(MERGE_METHOD_NAME), void.class);
}
if (allMethods.get(GETVALUE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", GETVALUE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(GETVALUE_METHOD_NAME, allMethods.get(GETVALUE_METHOD_NAME),
udfClassName);
checkArgumentCount(allMethods.get(GETVALUE_METHOD_NAME), 1, udfClassName);
checkReturnUdfType(udfClass, allMethods.get(GETVALUE_METHOD_NAME), returnType.getType());
}
if (!Modifier.isPublic(stateClass.getModifiers()) || !Modifier.isStatic(stateClass.getModifiers())) {
throw new AnalysisException(
String.format(
"UDAF '%s' should have one public & static 'State' class to Construction data ",
udfClassName));
}
checkUdafClass(clazz, cl, allMethods);
} catch (ClassNotFoundException e) {
throw new AnalysisException(
"Class [" + clazz + "] or inner class [State] not found in file :" + userFile);
@ -490,6 +419,96 @@ public class CreateFunctionStmt extends DdlStmt {
}
}
private void checkUdafClass(String clazz, ClassLoader cl, HashMap<String, Method> allMethods)
throws ClassNotFoundException, AnalysisException {
Class udfClass = cl.loadClass(clazz);
String udfClassName = udfClass.getCanonicalName();
String stateClassName = udfClassName + "$" + STATE_CLASS_NAME;
Class stateClass = cl.loadClass(stateClassName);
for (Method m : udfClass.getMethods()) {
if (!m.getDeclaringClass().equals(udfClass)) {
continue;
}
String name = m.getName();
if (allMethods.containsKey(name)) {
throw new AnalysisException(
String.format("UDF class '%s' has multiple methods with name '%s' ", udfClassName,
name));
}
allMethods.put(name, m);
}
if (allMethods.get(CREATE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", CREATE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(CREATE_METHOD_NAME, allMethods.get(CREATE_METHOD_NAME), udfClassName);
checkArgumentCount(allMethods.get(CREATE_METHOD_NAME), 0, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(CREATE_METHOD_NAME), stateClass);
}
if (allMethods.get(DESTROY_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", DESTROY_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(DESTROY_METHOD_NAME, allMethods.get(DESTROY_METHOD_NAME),
udfClassName);
checkArgumentCount(allMethods.get(DESTROY_METHOD_NAME), 1, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(DESTROY_METHOD_NAME), void.class);
}
if (allMethods.get(ADD_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", ADD_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(ADD_METHOD_NAME, allMethods.get(ADD_METHOD_NAME), udfClassName);
checkArgumentCount(allMethods.get(ADD_METHOD_NAME), argsDef.getArgTypes().length + 1, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(ADD_METHOD_NAME), void.class);
for (int i = 0; i < argsDef.getArgTypes().length; i++) {
Parameter p = allMethods.get(ADD_METHOD_NAME).getParameters()[i + 1];
checkUdfType(udfClass, allMethods.get(ADD_METHOD_NAME), argsDef.getArgTypes()[i], p.getType(),
p.getName());
}
}
if (allMethods.get(SERIALIZE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", SERIALIZE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(SERIALIZE_METHOD_NAME, allMethods.get(SERIALIZE_METHOD_NAME),
udfClassName);
checkArgumentCount(allMethods.get(SERIALIZE_METHOD_NAME), 2, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(SERIALIZE_METHOD_NAME), void.class);
}
if (allMethods.get(MERGE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", MERGE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(MERGE_METHOD_NAME, allMethods.get(MERGE_METHOD_NAME), udfClassName);
checkArgumentCount(allMethods.get(MERGE_METHOD_NAME), 2, udfClassName);
checkReturnJavaType(udfClassName, allMethods.get(MERGE_METHOD_NAME), void.class);
}
if (allMethods.get(GETVALUE_METHOD_NAME) == null) {
throw new AnalysisException(
String.format("No method '%s' in class '%s'!", GETVALUE_METHOD_NAME, udfClassName));
} else {
checkMethodNonStaticAndPublic(GETVALUE_METHOD_NAME, allMethods.get(GETVALUE_METHOD_NAME),
udfClassName);
checkArgumentCount(allMethods.get(GETVALUE_METHOD_NAME), 1, udfClassName);
checkReturnUdfType(udfClass, allMethods.get(GETVALUE_METHOD_NAME), returnType.getType());
}
if (!Modifier.isPublic(stateClass.getModifiers()) || !Modifier.isStatic(stateClass.getModifiers())) {
throw new AnalysisException(
String.format(
"UDAF '%s' should have one public & static 'State' class to Construction data ",
udfClassName));
}
}
private void checkMethodNonStaticAndPublic(String methoName, Method method, String udfClassName)
throws AnalysisException {
if (Modifier.isStatic(method.getModifiers())) {
@ -529,60 +548,18 @@ public class CreateFunctionStmt extends DdlStmt {
private void analyzeJavaUdf(String clazz) throws AnalysisException {
try {
if (Strings.isNullOrEmpty(userFile)) {
try {
ClassLoader cl = this.getClass().getClassLoader();
checkUdfClass(clazz, cl);
return;
} catch (ClassNotFoundException e) {
throw new AnalysisException("Class [" + clazz + "] not found in classpath");
}
}
URL[] urls = {new URL("jar:" + userFile + "!/")};
try (URLClassLoader cl = URLClassLoader.newInstance(urls)) {
Class udfClass = cl.loadClass(clazz);
List<Method> evalList = Arrays.stream(udfClass.getMethods())
.filter(m -> m.getDeclaringClass().equals(udfClass) && EVAL_METHOD_KEY.equals(m.getName()))
.collect(Collectors.toList());
if (evalList.size() == 0) {
throw new AnalysisException(String.format(
"No method '%s' in class '%s'!", EVAL_METHOD_KEY, udfClass.getCanonicalName()));
}
List<Method> evalNonStaticAndPublicList = evalList.stream()
.filter(m -> !Modifier.isStatic(m.getModifiers()) && Modifier.isPublic(m.getModifiers()))
.collect(Collectors.toList());
if (evalNonStaticAndPublicList.size() == 0) {
throw new AnalysisException(
String.format("Method '%s' in class '%s' should be non-static and public", EVAL_METHOD_KEY,
udfClass.getCanonicalName()));
}
List<Method> evalArgLengthMatchList = evalNonStaticAndPublicList.stream().filter(
m -> m.getParameters().length == argsDef.getArgTypes().length).collect(Collectors.toList());
if (evalArgLengthMatchList.size() == 0) {
throw new AnalysisException(
String.format("The number of parameters for method '%s' in class '%s' should be %d",
EVAL_METHOD_KEY, udfClass.getCanonicalName(), argsDef.getArgTypes().length));
} else if (evalArgLengthMatchList.size() == 1) {
Method method = evalArgLengthMatchList.get(0);
checkUdfType(udfClass, method, returnType.getType(), method.getReturnType(), "return");
for (int i = 0; i < method.getParameters().length; i++) {
Parameter p = method.getParameters()[i];
checkUdfType(udfClass, method, argsDef.getArgTypes()[i], p.getType(), p.getName());
}
} else {
// If multiple methods have the same parameters,
// the error message returned cannot be as specific as a single method
boolean hasError = false;
for (Method method : evalArgLengthMatchList) {
try {
checkUdfType(udfClass, method, returnType.getType(), method.getReturnType(), "return");
for (int i = 0; i < method.getParameters().length; i++) {
Parameter p = method.getParameters()[i];
checkUdfType(udfClass, method, argsDef.getArgTypes()[i], p.getType(), p.getName());
}
hasError = false;
break;
} catch (AnalysisException e) {
hasError = true;
}
}
if (hasError) {
throw new AnalysisException(String.format(
"Multi methods '%s' in class '%s' and no one passed parameter matching verification",
EVAL_METHOD_KEY, udfClass.getCanonicalName()));
}
}
checkUdfClass(clazz, cl);
} catch (ClassNotFoundException e) {
throw new AnalysisException("Class [" + clazz + "] not found in file :" + userFile);
} catch (IOException e) {
@ -593,6 +570,61 @@ public class CreateFunctionStmt extends DdlStmt {
}
}
private void checkUdfClass(String clazz, ClassLoader cl) throws ClassNotFoundException, AnalysisException {
Class udfClass = cl.loadClass(clazz);
List<Method> evalList = Arrays.stream(udfClass.getMethods())
.filter(m -> m.getDeclaringClass().equals(udfClass) && EVAL_METHOD_KEY.equals(m.getName()))
.collect(Collectors.toList());
if (evalList.size() == 0) {
throw new AnalysisException(String.format(
"No method '%s' in class '%s'!", EVAL_METHOD_KEY, udfClass.getCanonicalName()));
}
List<Method> evalNonStaticAndPublicList = evalList.stream()
.filter(m -> !Modifier.isStatic(m.getModifiers()) && Modifier.isPublic(m.getModifiers()))
.collect(Collectors.toList());
if (evalNonStaticAndPublicList.size() == 0) {
throw new AnalysisException(
String.format("Method '%s' in class '%s' should be non-static and public", EVAL_METHOD_KEY,
udfClass.getCanonicalName()));
}
List<Method> evalArgLengthMatchList = evalNonStaticAndPublicList.stream().filter(
m -> m.getParameters().length == argsDef.getArgTypes().length).collect(Collectors.toList());
if (evalArgLengthMatchList.size() == 0) {
throw new AnalysisException(
String.format("The number of parameters for method '%s' in class '%s' should be %d",
EVAL_METHOD_KEY, udfClass.getCanonicalName(), argsDef.getArgTypes().length));
} else if (evalArgLengthMatchList.size() == 1) {
Method method = evalArgLengthMatchList.get(0);
checkUdfType(udfClass, method, returnType.getType(), method.getReturnType(), "return");
for (int i = 0; i < method.getParameters().length; i++) {
Parameter p = method.getParameters()[i];
checkUdfType(udfClass, method, argsDef.getArgTypes()[i], p.getType(), p.getName());
}
} else {
// If multiple methods have the same parameters,
// the error message returned cannot be as specific as a single method
boolean hasError = false;
for (Method method : evalArgLengthMatchList) {
try {
checkUdfType(udfClass, method, returnType.getType(), method.getReturnType(), "return");
for (int i = 0; i < method.getParameters().length; i++) {
Parameter p = method.getParameters()[i];
checkUdfType(udfClass, method, argsDef.getArgTypes()[i], p.getType(), p.getName());
}
hasError = false;
break;
} catch (AnalysisException e) {
hasError = true;
}
}
if (hasError) {
throw new AnalysisException(String.format(
"Multi methods '%s' in class '%s' and no one passed parameter matching verification",
EVAL_METHOD_KEY, udfClass.getCanonicalName()));
}
}
}
private void checkUdfType(Class clazz, Method method, Type expType, Class pType, String pname)
throws AnalysisException {
Set<Class> javaTypes;

View File

@ -151,7 +151,7 @@ public class JavaUdaf extends AggregateFunction implements ExplicitlyCastableSig
JavaUdaf udaf = new JavaUdaf(fnName, aggregate.getId(), dbName, aggregate.getBinaryType(), sig,
intermediateType,
aggregate.getNullableMode(),
aggregate.getLocation().getLocation(),
aggregate.getLocation() == null ? null : aggregate.getLocation().getLocation(),
aggregate.getSymbolName(),
aggregate.getInitFnSymbol(),
aggregate.getUpdateFnSymbol(),
@ -182,7 +182,7 @@ public class JavaUdaf extends AggregateFunction implements ExplicitlyCastableSig
signature.returnType.toCatalogDataType(),
signature.hasVarArgs,
intermediateType.toCatalogDataType(),
URI.create(objectFile),
objectFile == null ? null : URI.create(objectFile),
initFn,
updateFn,
mergeFn,

View File

@ -129,7 +129,7 @@ public class JavaUdf extends ScalarFunction implements ExplicitlyCastableSignatu
JavaUdf udf = new JavaUdf(fnName, scalar.getId(), dbName, scalar.getBinaryType(), sig,
scalar.getNullableMode(),
scalar.getLocation().getLocation(),
scalar.getLocation() == null ? null : scalar.getLocation().getLocation(),
scalar.getSymbolName(),
scalar.getPrepareFnSymbol(),
scalar.getCloseFnSymbol(),
@ -154,7 +154,7 @@ public class JavaUdf extends ScalarFunction implements ExplicitlyCastableSignatu
signature.argumentsTypes.stream().map(DataType::toCatalogDataType).toArray(Type[]::new),
signature.returnType.toCatalogDataType(),
signature.hasVarArgs,
URI.create(objectFile),
objectFile == null ? null : URI.create(objectFile),
symbol,
prepareFn,
closeFn