[Enhancement](java-udf) java-udf module split to sub modules (#20185)

The java-udf module has become increasingly large and difficult to manage, making it inconvenient to package and use as needed. It needs to be split into multiple sub-modules, such as : java-commom、java-udf、jdbc-scanner、hudi-scanner、 paimon-scanner.

Co-authored-by: lexluo <lexluo@tencent.com>
This commit is contained in:
lexluo09
2023-06-13 09:41:22 +08:00
committed by GitHub
parent 51bbf17786
commit 57656b2459
64 changed files with 908 additions and 218 deletions

View File

@ -0,0 +1,74 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>be-java-extensions</artifactId>
<groupId>org.apache.doris</groupId>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>java-udf</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>java-common</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<finalName>java-udf</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/resources/package.xml</descriptor>
</descriptors>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,331 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.Pair;
import org.apache.doris.common.exception.UdfRuntimeException;
import org.apache.doris.common.jni.utils.UdfUtils;
import org.apache.doris.common.jni.utils.UdfUtils.JavaUdfDataType;
import org.apache.doris.thrift.TJavaUdfExecutorCtorParams;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import org.apache.log4j.Logger;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashMap;
/**
* udaf executor.
*/
public class UdafExecutor extends BaseExecutor {
private static final Logger LOG = Logger.getLogger(UdafExecutor.class);
private long inputPlacesPtr;
private HashMap<String, Method> allMethods;
private HashMap<Long, Object> stateObjMap;
private Class retClass;
/**
* Constructor to create an object.
*/
public UdafExecutor(byte[] thriftParams) throws Exception {
super(thriftParams);
}
/**
* close and invoke destroy function.
*/
@Override
public void close() {
allMethods = null;
super.close();
}
/**
* invoke add function, add row in loop [rowStart, rowEnd).
*/
public void add(boolean isSinglePlace, long rowStart, long rowEnd) throws UdfRuntimeException {
try {
long idx = rowStart;
do {
Long curPlace = null;
if (isSinglePlace) {
curPlace = UdfUtils.UNSAFE.getLong(null, UdfUtils.UNSAFE.getLong(null, inputPlacesPtr));
} else {
curPlace = UdfUtils.UNSAFE.getLong(null, UdfUtils.UNSAFE.getLong(null, inputPlacesPtr) + 8L * idx);
}
Object[] inputArgs = new Object[argTypes.length + 1];
Object state = stateObjMap.get(curPlace);
if (state != null) {
inputArgs[0] = state;
} else {
Object newState = createAggState();
stateObjMap.put(curPlace, newState);
inputArgs[0] = newState;
}
do {
Object[] inputObjects = allocateInputObjects(idx, 1);
for (int i = 0; i < argTypes.length; ++i) {
inputArgs[i + 1] = inputObjects[i];
}
allMethods.get(UDAF_ADD_FUNCTION).invoke(udf, inputArgs);
idx++;
} while (isSinglePlace && idx < rowEnd);
} while (idx < rowEnd);
} catch (Exception e) {
LOG.warn("invoke add function meet some error: " + e.getCause().toString());
throw new UdfRuntimeException("UDAF failed to add: ", e);
}
}
/**
* invoke user create function to get obj.
*/
public Object createAggState() throws UdfRuntimeException {
try {
return allMethods.get(UDAF_CREATE_FUNCTION).invoke(udf, null);
} catch (Exception e) {
LOG.warn("invoke createAggState function meet some error: " + e.getCause().toString());
throw new UdfRuntimeException("UDAF failed to create: ", e);
}
}
/**
* invoke destroy before colse. Here we destroy all data at once
*/
public void destroy() throws UdfRuntimeException {
try {
for (Object obj : stateObjMap.values()) {
allMethods.get(UDAF_DESTROY_FUNCTION).invoke(udf, obj);
}
stateObjMap.clear();
} catch (Exception e) {
LOG.warn("invoke destroy function meet some error: " + e.getCause().toString());
throw new UdfRuntimeException("UDAF failed to destroy: ", e);
}
}
/**
* invoke serialize function and return byte[] to backends.
*/
public byte[] serialize(long place) throws UdfRuntimeException {
try {
Object[] args = new Object[2];
ByteArrayOutputStream baos = new ByteArrayOutputStream();
args[0] = stateObjMap.get((Long) place);
args[1] = new DataOutputStream(baos);
allMethods.get(UDAF_SERIALIZE_FUNCTION).invoke(udf, args);
return baos.toByteArray();
} catch (Exception e) {
LOG.warn("invoke serialize function meet some error: " + e.getCause().toString());
throw new UdfRuntimeException("UDAF failed to serialize: ", e);
}
}
/*
* invoke reset function and reset the state to init.
*/
public void reset(long place) throws UdfRuntimeException {
try {
Object[] args = new Object[1];
args[0] = stateObjMap.get((Long) place);
if (args[0] == null) {
return;
}
allMethods.get(UDAF_RESET_FUNCTION).invoke(udf, args);
} catch (Exception e) {
LOG.warn("invoke reset function meet some error: " + e.getCause().toString());
throw new UdfRuntimeException("UDAF failed to reset: ", e);
}
}
/**
* invoke merge function and it's have done deserialze.
* here call deserialize first, and call merge.
*/
public void merge(long place, byte[] data) throws UdfRuntimeException {
try {
Object[] args = new Object[2];
ByteArrayInputStream bins = new ByteArrayInputStream(data);
args[0] = createAggState();
args[1] = new DataInputStream(bins);
allMethods.get(UDAF_DESERIALIZE_FUNCTION).invoke(udf, args);
args[1] = args[0];
Long curPlace = place;
Object state = stateObjMap.get(curPlace);
if (state != null) {
args[0] = state;
} else {
Object newState = createAggState();
stateObjMap.put(curPlace, newState);
args[0] = newState;
}
allMethods.get(UDAF_MERGE_FUNCTION).invoke(udf, args);
} catch (Exception e) {
LOG.warn("invoke merge function meet some error: " + e.getCause().toString());
throw new UdfRuntimeException("UDAF failed to merge: ", e);
}
}
/**
* invoke getValue to return finally result.
*/
public boolean getValue(long row, long place) throws UdfRuntimeException {
try {
if (stateObjMap.get(place) == null) {
stateObjMap.put(place, createAggState());
}
return storeUdfResult(allMethods.get(UDAF_RESULT_FUNCTION).invoke(udf, stateObjMap.get((Long) place)),
row, retClass);
} catch (Exception e) {
LOG.warn("invoke getValue function meet some error: " + e.getCause().toString());
throw new UdfRuntimeException("UDAF failed to result", e);
}
}
@Override
protected boolean storeUdfResult(Object obj, long row, Class retClass) throws UdfRuntimeException {
if (obj == null) {
// If result is null, return true directly when row == 0 as we have already inserted default value.
if (UdfUtils.UNSAFE.getLong(null, outputNullPtr) == -1) {
throw new UdfRuntimeException("UDAF failed to store null data to not null column");
}
return true;
}
return super.storeUdfResult(obj, row, retClass);
}
@Override
protected long getCurrentOutputOffset(long row, boolean isArrayType) {
if (isArrayType) {
return Integer.toUnsignedLong(
UdfUtils.UNSAFE.getInt(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * (row - 1)));
} else {
return Integer.toUnsignedLong(
UdfUtils.UNSAFE.getInt(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 4L * (row - 1)));
}
}
@Override
protected void init(TJavaUdfExecutorCtorParams request, String jarPath, Type funcRetType,
Type... parameterTypes) throws UdfRuntimeException {
String className = request.fn.aggregate_fn.symbol;
inputPlacesPtr = request.input_places_ptr;
allMethods = new HashMap<>();
stateObjMap = new HashMap<>();
ArrayList<String> signatures = Lists.newArrayList();
try {
ClassLoader loader;
if (jarPath != null) {
ClassLoader parent = getClass().getClassLoader();
classLoader = UdfUtils.getClassLoader(jarPath, parent);
loader = classLoader;
} else {
// for test
loader = ClassLoader.getSystemClassLoader();
}
Class<?> c = Class.forName(className, true, loader);
Constructor<?> ctor = c.getConstructor();
udf = ctor.newInstance();
Method[] methods = c.getDeclaredMethods();
int idx = 0;
for (idx = 0; idx < methods.length; ++idx) {
signatures.add(methods[idx].toGenericString());
switch (methods[idx].getName()) {
case UDAF_DESTROY_FUNCTION:
case UDAF_CREATE_FUNCTION:
case UDAF_MERGE_FUNCTION:
case UDAF_SERIALIZE_FUNCTION:
case UDAF_RESET_FUNCTION:
case UDAF_DESERIALIZE_FUNCTION: {
allMethods.put(methods[idx].getName(), methods[idx]);
break;
}
case UDAF_RESULT_FUNCTION: {
allMethods.put(methods[idx].getName(), methods[idx]);
Pair<Boolean, JavaUdfDataType> returnType = UdfUtils.setReturnType(funcRetType,
methods[idx].getReturnType());
if (!returnType.first) {
LOG.debug("result function set return parameterTypes has error");
} else {
retType = returnType.second;
retClass = methods[idx].getReturnType();
}
break;
}
case UDAF_ADD_FUNCTION: {
allMethods.put(methods[idx].getName(), methods[idx]);
argClass = methods[idx].getParameterTypes();
if (argClass.length != parameterTypes.length + 1) {
LOG.debug("add function parameterTypes length not equal " + argClass.length + " "
+ parameterTypes.length + " " + methods[idx].getName());
}
if (!(parameterTypes.length == 0)) {
Pair<Boolean, JavaUdfDataType[]> inputType = UdfUtils.setArgTypes(parameterTypes,
argClass, true);
if (!inputType.first) {
LOG.debug("add function set arg parameterTypes has error");
} else {
argTypes = inputType.second;
}
} else {
// Special case where the UDF doesn't take any input args
argTypes = new JavaUdfDataType[0];
}
break;
}
default:
break;
}
}
if (idx == methods.length) {
return;
}
StringBuilder sb = new StringBuilder();
sb.append("Unable to find evaluate function with the correct signature: ").append(className + ".evaluate(")
.append(Joiner.on(", ").join(parameterTypes)).append(")\n").append("UDF contains: \n ")
.append(Joiner.on("\n ").join(signatures));
throw new UdfRuntimeException(sb.toString());
} catch (MalformedURLException e) {
throw new UdfRuntimeException("Unable to load jar.", e);
} catch (SecurityException e) {
throw new UdfRuntimeException("Unable to load function.", e);
} catch (ClassNotFoundException e) {
throw new UdfRuntimeException("Unable to find class.", e);
} catch (NoSuchMethodException e) {
throw new UdfRuntimeException("Unable to find constructor with no arguments.", e);
} catch (IllegalArgumentException e) {
throw new UdfRuntimeException("Unable to call UDAF constructor with no arguments.", e);
} catch (Exception e) {
throw new UdfRuntimeException("Unable to call create UDAF instance.", e);
}
}
}

View File

@ -0,0 +1,247 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.Pair;
import org.apache.doris.common.exception.UdfRuntimeException;
import org.apache.doris.common.jni.utils.UdfUtils;
import org.apache.doris.common.jni.utils.UdfUtils.JavaUdfDataType;
import org.apache.doris.thrift.TJavaUdfExecutorCtorParams;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import org.apache.log4j.Logger;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.util.ArrayList;
public class UdfExecutor extends BaseExecutor {
private static final Logger LOG = Logger.getLogger(UdfExecutor.class);
// setup by init() and cleared by close()
private Method method;
// Pre-constructed input objects for the UDF. This minimizes object creation overhead
// as these objects are reused across calls to evaluate().
private Object[] inputObjects;
private long outputOffset;
private long rowIdx;
private long batchSizePtr;
/**
* Create a UdfExecutor, using parameters from a serialized thrift object. Used by
* the backend.
*/
public UdfExecutor(byte[] thriftParams) throws Exception {
super(thriftParams);
}
/**
* Close the class loader we may have created.
*/
@Override
public void close() {
// We are now un-usable (because the class loader has been
// closed), so null out method_ and classLoader_.
method = null;
super.close();
}
/**
* evaluate function called by the backend. The inputs to the UDF have
* been serialized to 'input'
*/
public void evaluate() throws UdfRuntimeException {
int batchSize = UdfUtils.UNSAFE.getInt(null, batchSizePtr);
try {
if (retType.equals(JavaUdfDataType.STRING) || retType.equals(JavaUdfDataType.VARCHAR)
|| retType.equals(JavaUdfDataType.CHAR) || retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
// If this udf return variable-size type (e.g.) String, we have to allocate output
// buffer multiple times until buffer size is enough to store output column. So we
// always begin with the last evaluated row instead of beginning of this batch.
rowIdx = UdfUtils.UNSAFE.getLong(null, outputIntermediateStatePtr + 8);
if (rowIdx == 0) {
outputOffset = 0L;
}
} else {
rowIdx = 0;
}
for (; rowIdx < batchSize; rowIdx++) {
inputObjects = allocateInputObjects(rowIdx, 0);
// `storeUdfResult` is called to store udf result to output column. If true
// is returned, current value is stored successfully. Otherwise, current result is
// not processed successfully (e.g. current output buffer is not large enough) so
// we break this loop directly.
if (!storeUdfResult(evaluate(inputObjects), rowIdx, method.getReturnType())) {
UdfUtils.UNSAFE.putLong(null, outputIntermediateStatePtr + 8, rowIdx);
return;
}
}
} catch (Exception e) {
if (retType.equals(JavaUdfDataType.STRING) || retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
UdfUtils.UNSAFE.putLong(null, outputIntermediateStatePtr + 8, batchSize);
}
throw new UdfRuntimeException("UDF::evaluate() ran into a problem.", e);
}
if (retType.equals(JavaUdfDataType.STRING) || retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
UdfUtils.UNSAFE.putLong(null, outputIntermediateStatePtr + 8, rowIdx);
}
}
/**
* Evaluates the UDF with 'args' as the input to the UDF.
*/
private Object evaluate(Object... args) throws UdfRuntimeException {
try {
return method.invoke(udf, args);
} catch (Exception e) {
throw new UdfRuntimeException("UDF failed to evaluate", e);
}
}
public Method getMethod() {
return method;
}
// Sets the result object 'obj' into the outputBufferPtr and outputNullPtr_
@Override
protected boolean storeUdfResult(Object obj, long row, Class retClass) throws UdfRuntimeException {
if (obj == null) {
if (UdfUtils.UNSAFE.getLong(null, outputNullPtr) == -1) {
throw new UdfRuntimeException("UDF failed to store null data to not null column");
}
UdfUtils.UNSAFE.putByte(null, UdfUtils.UNSAFE.getLong(null, outputNullPtr) + row, (byte) 1);
if (retType.equals(JavaUdfDataType.STRING)) {
UdfUtils.UNSAFE.putInt(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr)
+ 4L * row, Integer.parseUnsignedInt(String.valueOf(outputOffset)));
} else if (retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row,
Long.parseUnsignedLong(String.valueOf(outputOffset)));
}
return true;
}
return super.storeUdfResult(obj, row, retClass);
}
@Override
protected long getCurrentOutputOffset(long row, boolean isArrayType) {
return outputOffset;
}
@Override
protected void updateOutputOffset(long offset) {
outputOffset = offset;
}
// Preallocate the input objects that will be passed to the underlying UDF.
// These objects are allocated once and reused across calls to evaluate()
@Override
protected void init(TJavaUdfExecutorCtorParams request, String jarPath, Type funcRetType,
Type... parameterTypes) throws UdfRuntimeException {
String className = request.fn.scalar_fn.symbol;
batchSizePtr = request.batch_size_ptr;
outputOffset = 0L;
rowIdx = 0L;
ArrayList<String> signatures = Lists.newArrayList();
try {
LOG.debug("Loading UDF '" + className + "' from " + jarPath);
ClassLoader loader;
if (jarPath != null) {
// Save for cleanup.
ClassLoader parent = getClass().getClassLoader();
classLoader = UdfUtils.getClassLoader(jarPath, parent);
loader = classLoader;
} else {
// for test
loader = ClassLoader.getSystemClassLoader();
}
Class<?> c = Class.forName(className, true, loader);
Constructor<?> ctor = c.getConstructor();
udf = ctor.newInstance();
Method[] methods = c.getMethods();
for (Method m : methods) {
// By convention, the udf must contain the function "evaluate"
if (!m.getName().equals(UDF_FUNCTION_NAME)) {
continue;
}
signatures.add(m.toGenericString());
argClass = m.getParameterTypes();
// Try to match the arguments
if (argClass.length != parameterTypes.length) {
continue;
}
method = m;
Pair<Boolean, JavaUdfDataType> returnType;
if (argClass.length == 0 && parameterTypes.length == 0) {
// Special case where the UDF doesn't take any input args
returnType = UdfUtils.setReturnType(funcRetType, m.getReturnType());
if (!returnType.first) {
continue;
} else {
retType = returnType.second;
}
argTypes = new JavaUdfDataType[0];
LOG.debug("Loaded UDF '" + className + "' from " + jarPath);
return;
}
returnType = UdfUtils.setReturnType(funcRetType, m.getReturnType());
if (!returnType.first) {
continue;
} else {
retType = returnType.second;
}
Pair<Boolean, JavaUdfDataType[]> inputType = UdfUtils.setArgTypes(parameterTypes, argClass, false);
if (!inputType.first) {
continue;
} else {
argTypes = inputType.second;
}
LOG.debug("Loaded UDF '" + className + "' from " + jarPath);
return;
}
StringBuilder sb = new StringBuilder();
sb.append("Unable to find evaluate function with the correct signature: ")
.append(className + ".evaluate(")
.append(Joiner.on(", ").join(parameterTypes))
.append(")\n")
.append("UDF contains: \n ")
.append(Joiner.on("\n ").join(signatures));
throw new UdfRuntimeException(sb.toString());
} catch (MalformedURLException e) {
throw new UdfRuntimeException("Unable to load jar.", e);
} catch (SecurityException e) {
throw new UdfRuntimeException("Unable to load function.", e);
} catch (ClassNotFoundException e) {
throw new UdfRuntimeException("Unable to find class.", e);
} catch (NoSuchMethodException e) {
throw new UdfRuntimeException(
"Unable to find constructor with no arguments.", e);
} catch (IllegalArgumentException e) {
throw new UdfRuntimeException(
"Unable to call UDF constructor with no arguments.", e);
} catch (Exception e) {
throw new UdfRuntimeException("Unable to call create UDF instance.", e);
}
}
}

View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
<id>jar-with-dependencies</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<unpack>true</unpack>
<scope>runtime</scope>
<unpackOptions>
<excludes>
<exclude>**/Log4j2Plugins.dat</exclude>
</excludes>
</unpackOptions>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
public class ConstantOneUdf {
public int evaluate() {
return 1;
}
}

View File

@ -0,0 +1,30 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
import java.time.LocalDateTime;
public class DateTimeUdf {
/**
* input argument of datetime.
* return year
*/
public int evaluate(LocalDateTime a) {
return a.getYear();
}
}

View File

@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
import java.math.BigDecimal;
public class DecimalUdf {
/**
* a input argument of decimal.
* b input argument of decimal
* sum of a and b
*/
public BigDecimal evaluate(BigDecimal a, BigDecimal b) {
return a.add(b);
}
}

View File

@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
import java.math.BigInteger;
public class LargeIntUdf {
/**
* input argument of largeint.
* input argument of largeint
* sum of a and b
*/
public BigInteger evaluate(BigInteger a, BigInteger b) {
return a.add(b);
}
}

View File

@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
public class SimpleAddUdf {
public Integer evaluate(Integer a, int b) {
return a == null ? null : a + b;
}
}

View File

@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
public class StringConcatUdf {
public String evaluate(String a, String b) {
return a == null || b == null ? null : a + b;
}
}

View File

@ -0,0 +1,600 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.udf;
import org.apache.doris.common.jni.utils.UdfUtils;
import org.apache.doris.thrift.TFunction;
import org.apache.doris.thrift.TFunctionBinaryType;
import org.apache.doris.thrift.TFunctionName;
import org.apache.doris.thrift.TJavaUdfExecutorCtorParams;
import org.apache.doris.thrift.TPrimitiveType;
import org.apache.doris.thrift.TScalarFunction;
import org.apache.doris.thrift.TScalarType;
import org.apache.doris.thrift.TTypeDesc;
import org.apache.doris.thrift.TTypeNode;
import org.apache.doris.thrift.TTypeNodeType;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.junit.Test;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
public class UdfExecutorTest {
@Test
public void testDateTimeUdf() throws Exception {
TScalarFunction scalarFunction = new TScalarFunction();
scalarFunction.symbol = "org.apache.doris.udf.DateTimeUdf";
TFunction fn = new TFunction();
fn.setBinaryType(TFunctionBinaryType.JAVA_UDF);
TTypeNode typeNode = new TTypeNode(TTypeNodeType.SCALAR);
typeNode.setScalarType(new TScalarType(TPrimitiveType.INT));
fn.setRetType(new TTypeDesc(Collections.singletonList(typeNode)));
TTypeNode typeNodeArg = new TTypeNode(TTypeNodeType.SCALAR);
typeNodeArg.setScalarType(new TScalarType(TPrimitiveType.DATETIME));
TTypeDesc typeDescArg = new TTypeDesc(Collections.singletonList(typeNodeArg));
fn.arg_types = Arrays.asList(typeDescArg);
fn.scalar_fn = scalarFunction;
fn.name = new TFunctionName("DateTimeUdf");
long batchSizePtr = UdfUtils.UNSAFE.allocateMemory(4);
int batchSize = 10;
UdfUtils.UNSAFE.putInt(batchSizePtr, batchSize);
TJavaUdfExecutorCtorParams params = new TJavaUdfExecutorCtorParams();
params.setBatchSizePtr(batchSizePtr);
params.setFn(fn);
long outputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputNullPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputBuffer = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
long outputNull = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(outputBufferPtr, outputBuffer);
UdfUtils.UNSAFE.putLong(outputNullPtr, outputNull);
params.setOutputBufferPtr(outputBufferPtr);
params.setOutputNullPtr(outputNullPtr);
int numCols = 1;
long inputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputNullPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputBuffer1 = UdfUtils.UNSAFE.allocateMemory(8 * batchSize);
long inputNull1 = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(inputBufferPtr, inputBuffer1);
UdfUtils.UNSAFE.putLong(inputNullPtr, inputNull1);
long[] inputLongDateTime =
new long[] {562960991655690406L, 563242466632401062L, 563523941609111718L, 563805416585822374L,
564086891562533030L, 564368366539243686L, 564649841515954342L, 564931316492664998L,
565212791469375654L, 565494266446086310L};
for (int i = 0; i < batchSize; ++i) {
UdfUtils.UNSAFE.putLong(null, inputBuffer1 + i * 8, inputLongDateTime[i]);
UdfUtils.UNSAFE.putByte(null, inputNull1 + i, (byte) 0);
}
params.setInputBufferPtrs(inputBufferPtr);
params.setInputNullsPtrs(inputNullPtr);
params.setInputOffsetsPtrs(0);
TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory();
TSerializer serializer = new TSerializer(factory);
UdfExecutor executor = new UdfExecutor(serializer.serialize(params));
executor.evaluate();
for (int i = 0; i < batchSize; ++i) {
assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 0);
assert (UdfUtils.UNSAFE.getInt(outputBuffer + 4 * i) == (2000 + i));
}
}
@Test
public void testDecimalUdf() throws Exception {
TScalarFunction scalarFunction = new TScalarFunction();
scalarFunction.symbol = "org.apache.doris.udf.DecimalUdf";
TFunction fn = new TFunction();
fn.binary_type = TFunctionBinaryType.JAVA_UDF;
TTypeNode typeNode = new TTypeNode(TTypeNodeType.SCALAR);
TScalarType scalarType = new TScalarType(TPrimitiveType.DECIMALV2);
scalarType.setScale(9);
scalarType.setPrecision(27);
typeNode.scalar_type = scalarType;
TTypeDesc typeDesc = new TTypeDesc(Collections.singletonList(typeNode));
fn.ret_type = typeDesc;
fn.arg_types = Arrays.asList(typeDesc, typeDesc);
fn.scalar_fn = scalarFunction;
fn.name = new TFunctionName("DecimalUdf");
long batchSizePtr = UdfUtils.UNSAFE.allocateMemory(8);
int batchSize = 10;
UdfUtils.UNSAFE.putInt(batchSizePtr, batchSize);
TJavaUdfExecutorCtorParams params = new TJavaUdfExecutorCtorParams();
params.setBatchSizePtr(batchSizePtr);
params.setFn(fn);
long outputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputNullPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputBuffer = UdfUtils.UNSAFE.allocateMemory(16 * batchSize);
long outputNull = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(outputBufferPtr, outputBuffer);
UdfUtils.UNSAFE.putLong(outputNullPtr, outputNull);
params.setOutputBufferPtr(outputBufferPtr);
params.setOutputNullPtr(outputNullPtr);
int numCols = 2;
long inputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputNullPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputBuffer1 = UdfUtils.UNSAFE.allocateMemory(16 * batchSize);
long inputNull1 = UdfUtils.UNSAFE.allocateMemory(batchSize);
long inputBuffer2 = UdfUtils.UNSAFE.allocateMemory(16 * batchSize);
long inputNull2 = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(inputBufferPtr, inputBuffer1);
UdfUtils.UNSAFE.putLong(inputBufferPtr + 8, inputBuffer2);
UdfUtils.UNSAFE.putLong(inputNullPtr, inputNull1);
UdfUtils.UNSAFE.putLong(inputNullPtr + 8, inputNull2);
long[] inputLong =
new long[] {562960991655690406L, 563242466632401062L, 563523941609111718L, 563805416585822374L,
564086891562533030L, 564368366539243686L, 564649841515954342L, 564931316492664998L,
565212791469375654L, 565494266446086310L};
BigDecimal[] decimalArray = new BigDecimal[10];
for (int i = 0; i < batchSize; ++i) {
BigInteger temp = BigInteger.valueOf(inputLong[i]);
decimalArray[i] = new BigDecimal(temp, 9);
}
BigDecimal decimal2 = new BigDecimal(BigInteger.valueOf(0L), 9);
byte[] intput2 = convertByteOrder(decimal2.unscaledValue().toByteArray());
byte[] value2 = new byte[16];
if (decimal2.signum() == -1) {
Arrays.fill(value2, (byte) -1);
}
for (int index = 0; index < Math.min(intput2.length, value2.length); ++index) {
value2[index] = intput2[index];
}
for (int i = 0; i < batchSize; ++i) {
byte[] intput1 = convertByteOrder(decimalArray[i].unscaledValue().toByteArray());
byte[] value1 = new byte[16];
if (decimalArray[i].signum() == -1) {
Arrays.fill(value1, (byte) -1);
}
for (int index = 0; index < Math.min(intput1.length, value1.length); ++index) {
value1[index] = intput1[index];
}
UdfUtils.copyMemory(value1, UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer1 + i * 16, value1.length);
UdfUtils.copyMemory(value2, UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer2 + i * 16, value2.length);
UdfUtils.UNSAFE.putByte(null, inputNull1 + i, (byte) 0);
UdfUtils.UNSAFE.putByte(null, inputNull2 + i, (byte) 0);
}
params.setInputBufferPtrs(inputBufferPtr);
params.setInputNullsPtrs(inputNullPtr);
params.setInputOffsetsPtrs(0);
TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory();
TSerializer serializer = new TSerializer(factory);
UdfExecutor udfExecutor = new UdfExecutor(serializer.serialize(params));
udfExecutor.evaluate();
for (int i = 0; i < batchSize; ++i) {
byte[] bytes = new byte[16];
assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 0);
UdfUtils.copyMemory(null, outputBuffer + 16 * i, bytes, UdfUtils.BYTE_ARRAY_OFFSET, bytes.length);
BigInteger integer = new BigInteger(convertByteOrder(bytes));
BigDecimal result = new BigDecimal(integer, 9);
assert (result.equals(decimalArray[i]));
}
}
@Test
public void testConstantOneUdf() throws Exception {
TScalarFunction scalarFunction = new TScalarFunction();
scalarFunction.symbol = "org.apache.doris.udf.ConstantOneUdf";
TFunction fn = new TFunction();
fn.binary_type = TFunctionBinaryType.JAVA_UDF;
TTypeNode typeNode = new TTypeNode(TTypeNodeType.SCALAR);
typeNode.scalar_type = new TScalarType(TPrimitiveType.INT);
fn.ret_type = new TTypeDesc(Collections.singletonList(typeNode));
fn.arg_types = new ArrayList<>();
fn.scalar_fn = scalarFunction;
fn.name = new TFunctionName("ConstantOne");
long batchSizePtr = UdfUtils.UNSAFE.allocateMemory(4);
int batchSize = 10;
UdfUtils.UNSAFE.putInt(batchSizePtr, batchSize);
TJavaUdfExecutorCtorParams params = new TJavaUdfExecutorCtorParams();
params.setBatchSizePtr(batchSizePtr);
params.setFn(fn);
long outputBuffer = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
long outputNull = UdfUtils.UNSAFE.allocateMemory(batchSize);
long outputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8);
UdfUtils.UNSAFE.putLong(outputBufferPtr, outputBuffer);
long outputNullPtr = UdfUtils.UNSAFE.allocateMemory(8);
UdfUtils.UNSAFE.putLong(outputNullPtr, outputNull);
params.setOutputBufferPtr(outputBufferPtr);
params.setOutputNullPtr(outputNullPtr);
params.setInputBufferPtrs(0);
params.setInputNullsPtrs(0);
params.setInputOffsetsPtrs(0);
TBinaryProtocol.Factory factory =
new TBinaryProtocol.Factory();
TSerializer serializer = new TSerializer(factory);
UdfExecutor executor;
executor = new UdfExecutor(serializer.serialize(params));
executor.evaluate();
for (int i = 0; i < 10; i++) {
assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 0);
assert (UdfUtils.UNSAFE.getInt(outputBuffer + 4 * i) == 1);
}
}
@Test
public void testSimpleAddUdf() throws Exception {
TScalarFunction scalarFunction = new TScalarFunction();
scalarFunction.symbol = "org.apache.doris.udf.SimpleAddUdf";
TFunction fn = new TFunction();
fn.binary_type = TFunctionBinaryType.JAVA_UDF;
TTypeNode typeNode = new TTypeNode(TTypeNodeType.SCALAR);
typeNode.scalar_type = new TScalarType(TPrimitiveType.INT);
TTypeDesc typeDesc = new TTypeDesc(Collections.singletonList(typeNode));
fn.ret_type = typeDesc;
fn.arg_types = Arrays.asList(typeDesc, typeDesc);
fn.scalar_fn = scalarFunction;
fn.name = new TFunctionName("SimpleAdd");
long batchSizePtr = UdfUtils.UNSAFE.allocateMemory(4);
int batchSize = 10;
UdfUtils.UNSAFE.putInt(batchSizePtr, batchSize);
TJavaUdfExecutorCtorParams params = new TJavaUdfExecutorCtorParams();
params.setBatchSizePtr(batchSizePtr);
params.setFn(fn);
long outputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputNullPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputBuffer = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
long outputNull = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(outputBufferPtr, outputBuffer);
UdfUtils.UNSAFE.putLong(outputNullPtr, outputNull);
params.setOutputBufferPtr(outputBufferPtr);
params.setOutputNullPtr(outputNullPtr);
int numCols = 2;
long inputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputNullPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputBuffer1 = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
long inputNull1 = UdfUtils.UNSAFE.allocateMemory(batchSize);
long inputBuffer2 = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
long inputNull2 = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(inputBufferPtr, inputBuffer1);
UdfUtils.UNSAFE.putLong(inputBufferPtr + 8, inputBuffer2);
UdfUtils.UNSAFE.putLong(inputNullPtr, inputNull1);
UdfUtils.UNSAFE.putLong(inputNullPtr + 8, inputNull2);
for (int i = 0; i < batchSize; i++) {
UdfUtils.UNSAFE.putInt(null, inputBuffer1 + i * 4, i);
UdfUtils.UNSAFE.putInt(null, inputBuffer2 + i * 4, i);
if (i % 2 == 0) {
UdfUtils.UNSAFE.putByte(null, inputNull1 + i, (byte) 1);
} else {
UdfUtils.UNSAFE.putByte(null, inputNull1 + i, (byte) 0);
}
UdfUtils.UNSAFE.putByte(null, inputNull2 + i, (byte) 0);
}
params.setInputBufferPtrs(inputBufferPtr);
params.setInputNullsPtrs(inputNullPtr);
params.setInputOffsetsPtrs(0);
TBinaryProtocol.Factory factory =
new TBinaryProtocol.Factory();
TSerializer serializer = new TSerializer(factory);
UdfExecutor executor;
executor = new UdfExecutor(serializer.serialize(params));
executor.evaluate();
for (int i = 0; i < batchSize; i++) {
if (i % 2 == 0) {
assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 1);
} else {
assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 0);
assert (UdfUtils.UNSAFE.getInt(outputBuffer + 4 * i) == i * 2);
}
}
}
@Test
public void testStringConcatUdf() throws Exception {
TScalarFunction scalarFunction = new TScalarFunction();
scalarFunction.symbol = "org.apache.doris.udf.StringConcatUdf";
TFunction fn = new TFunction();
fn.binary_type = TFunctionBinaryType.JAVA_UDF;
TTypeNode typeNode = new TTypeNode(TTypeNodeType.SCALAR);
typeNode.scalar_type = new TScalarType(TPrimitiveType.STRING);
TTypeDesc typeDesc = new TTypeDesc(Collections.singletonList(typeNode));
fn.ret_type = typeDesc;
fn.arg_types = Arrays.asList(typeDesc, typeDesc);
fn.scalar_fn = scalarFunction;
fn.name = new TFunctionName("StringConcat");
long batchSizePtr = UdfUtils.UNSAFE.allocateMemory(32);
int batchSize = 10;
UdfUtils.UNSAFE.putInt(batchSizePtr, batchSize);
TJavaUdfExecutorCtorParams params = new TJavaUdfExecutorCtorParams();
params.setBatchSizePtr(batchSizePtr);
params.setFn(fn);
long outputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputNullPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputOffsetsPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputIntermediateStatePtr = UdfUtils.UNSAFE.allocateMemory(8 * 2);
String[] input1 = new String[batchSize];
String[] input2 = new String[batchSize];
long[] inputOffsets1 = new long[batchSize];
long[] inputOffsets2 = new long[batchSize];
long inputBufferSize1 = 0;
long inputBufferSize2 = 0;
for (int i = 0; i < batchSize; i++) {
input1[i] = "Input1_" + i;
input2[i] = "Input2_" + i;
inputOffsets1[i] = i == 0 ? input1[i].getBytes(StandardCharsets.UTF_8).length
: inputOffsets1[i - 1] + input1[i].getBytes(StandardCharsets.UTF_8).length;
inputOffsets2[i] = i == 0 ? input2[i].getBytes(StandardCharsets.UTF_8).length
: inputOffsets2[i - 1] + input2[i].getBytes(StandardCharsets.UTF_8).length;
inputBufferSize1 += input1[i].getBytes(StandardCharsets.UTF_8).length;
inputBufferSize2 += input2[i].getBytes(StandardCharsets.UTF_8).length;
}
// In our test case, output buffer is (8 + 1) bytes * batchSize
long outputBuffer = UdfUtils.UNSAFE.allocateMemory(inputBufferSize1 + inputBufferSize2 + batchSize);
long outputNull = UdfUtils.UNSAFE.allocateMemory(batchSize);
long outputOffset = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
UdfUtils.UNSAFE.putLong(outputBufferPtr, outputBuffer);
UdfUtils.UNSAFE.putLong(outputNullPtr, outputNull);
UdfUtils.UNSAFE.putLong(outputOffsetsPtr, outputOffset);
// reserved buffer size
UdfUtils.UNSAFE.putLong(outputIntermediateStatePtr, inputBufferSize1 + inputBufferSize2 + batchSize);
// current row id
UdfUtils.UNSAFE.putLong(outputIntermediateStatePtr + 8, 0);
params.setOutputBufferPtr(outputBufferPtr);
params.setOutputNullPtr(outputNullPtr);
params.setOutputOffsetsPtr(outputOffsetsPtr);
params.setOutputIntermediateStatePtr(outputIntermediateStatePtr);
int numCols = 2;
long inputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputNullPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputOffsetsPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputBuffer1 = UdfUtils.UNSAFE.allocateMemory(inputBufferSize1 + batchSize);
long inputOffset1 = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
long inputBuffer2 = UdfUtils.UNSAFE.allocateMemory(inputBufferSize2 + batchSize);
long inputOffset2 = UdfUtils.UNSAFE.allocateMemory(4 * batchSize);
UdfUtils.UNSAFE.putLong(inputBufferPtr, inputBuffer1);
UdfUtils.UNSAFE.putLong(inputBufferPtr + 8, inputBuffer2);
UdfUtils.UNSAFE.putLong(inputNullPtr, -1);
UdfUtils.UNSAFE.putLong(inputNullPtr + 8, -1);
UdfUtils.UNSAFE.putLong(inputOffsetsPtr, inputOffset1);
UdfUtils.UNSAFE.putLong(inputOffsetsPtr + 8, inputOffset2);
for (int i = 0; i < batchSize; i++) {
if (i == 0) {
UdfUtils.copyMemory(input1[i].getBytes(StandardCharsets.UTF_8),
UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer1,
input1[i].getBytes(StandardCharsets.UTF_8).length);
UdfUtils.copyMemory(input2[i].getBytes(StandardCharsets.UTF_8),
UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer2,
input2[i].getBytes(StandardCharsets.UTF_8).length);
} else {
UdfUtils.copyMemory(input1[i].getBytes(StandardCharsets.UTF_8),
UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer1 + inputOffsets1[i - 1],
input1[i].getBytes(StandardCharsets.UTF_8).length);
UdfUtils.copyMemory(input2[i].getBytes(StandardCharsets.UTF_8),
UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer2 + inputOffsets2[i - 1],
input2[i].getBytes(StandardCharsets.UTF_8).length);
}
UdfUtils.UNSAFE.putInt(null, inputOffset1 + 4L * i,
Integer.parseUnsignedInt(String.valueOf(inputOffsets1[i])));
UdfUtils.UNSAFE.putInt(null, inputOffset2 + 4L * i,
Integer.parseUnsignedInt(String.valueOf(inputOffsets2[i])));
}
params.setInputBufferPtrs(inputBufferPtr);
params.setInputNullsPtrs(inputNullPtr);
params.setInputOffsetsPtrs(inputOffsetsPtr);
TBinaryProtocol.Factory factory =
new TBinaryProtocol.Factory();
TSerializer serializer = new TSerializer(factory);
UdfExecutor executor;
executor = new UdfExecutor(serializer.serialize(params));
executor.evaluate();
for (int i = 0; i < batchSize; i++) {
byte[] bytes = new byte[input1[i].getBytes(StandardCharsets.UTF_8).length
+ input2[i].getBytes(StandardCharsets.UTF_8).length];
assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 0);
if (i == 0) {
UdfUtils.copyMemory(null, outputBuffer, bytes, UdfUtils.BYTE_ARRAY_OFFSET,
bytes.length);
} else {
long lastOffset = UdfUtils.UNSAFE.getInt(null, outputOffset + 4 * (i - 1));
UdfUtils.copyMemory(null, outputBuffer + lastOffset, bytes, UdfUtils.BYTE_ARRAY_OFFSET,
bytes.length);
}
assert (new String(bytes, StandardCharsets.UTF_8).equals(input1[i] + input2[i]));
assert (UdfUtils.UNSAFE.getByte(null, outputNull + i) == 0);
}
}
@Test
public void testLargeIntUdf() throws Exception {
TScalarFunction scalarFunction = new TScalarFunction();
scalarFunction.symbol = "org.apache.doris.udf.LargeIntUdf";
TFunction fn = new TFunction();
fn.binary_type = TFunctionBinaryType.JAVA_UDF;
TTypeNode typeNode = new TTypeNode(TTypeNodeType.SCALAR);
typeNode.scalar_type = new TScalarType(TPrimitiveType.LARGEINT);
TTypeDesc typeDesc = new TTypeDesc(Collections.singletonList(typeNode));
fn.ret_type = typeDesc;
fn.arg_types = Arrays.asList(typeDesc, typeDesc);
fn.scalar_fn = scalarFunction;
fn.name = new TFunctionName("LargeIntUdf");
long batchSizePtr = UdfUtils.UNSAFE.allocateMemory(8);
int batchSize = 10;
UdfUtils.UNSAFE.putInt(batchSizePtr, batchSize);
TJavaUdfExecutorCtorParams params = new TJavaUdfExecutorCtorParams();
params.setBatchSizePtr(batchSizePtr);
params.setFn(fn);
long outputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputNullPtr = UdfUtils.UNSAFE.allocateMemory(8);
long outputBuffer = UdfUtils.UNSAFE.allocateMemory(16 * batchSize);
long outputNull = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(outputBufferPtr, outputBuffer);
UdfUtils.UNSAFE.putLong(outputNullPtr, outputNull);
params.setOutputBufferPtr(outputBufferPtr);
params.setOutputNullPtr(outputNullPtr);
int numCols = 2;
long inputBufferPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputNullPtr = UdfUtils.UNSAFE.allocateMemory(8 * numCols);
long inputBuffer1 = UdfUtils.UNSAFE.allocateMemory(16 * batchSize);
long inputNull1 = UdfUtils.UNSAFE.allocateMemory(batchSize);
long inputBuffer2 = UdfUtils.UNSAFE.allocateMemory(16 * batchSize);
long inputNull2 = UdfUtils.UNSAFE.allocateMemory(batchSize);
UdfUtils.UNSAFE.putLong(inputBufferPtr, inputBuffer1);
UdfUtils.UNSAFE.putLong(inputBufferPtr + 8, inputBuffer2);
UdfUtils.UNSAFE.putLong(inputNullPtr, inputNull1);
UdfUtils.UNSAFE.putLong(inputNullPtr + 8, inputNull2);
long[] inputLong =
new long[] {562960991655690406L, 563242466632401062L, 563523941609111718L, 563805416585822374L,
564086891562533030L, 564368366539243686L, 564649841515954342L, 564931316492664998L,
565212791469375654L, 565494266446086310L};
BigInteger[] integerArray = new BigInteger[10];
for (int i = 0; i < batchSize; ++i) {
integerArray[i] = BigInteger.valueOf(inputLong[i]);
}
BigInteger integer2 = BigInteger.valueOf(1L);
byte[] intput2 = convertByteOrder(integer2.toByteArray());
byte[] value2 = new byte[16];
if (integer2.signum() == -1) {
Arrays.fill(value2, (byte) -1);
}
for (int index = 0; index < Math.min(intput2.length, value2.length); ++index) {
value2[index] = intput2[index];
}
for (int i = 0; i < batchSize; ++i) {
byte[] intput1 = convertByteOrder(integerArray[i].toByteArray());
byte[] value1 = new byte[16];
if (integerArray[i].signum() == -1) {
Arrays.fill(value1, (byte) -1);
}
for (int index = 0; index < Math.min(intput1.length, value1.length); ++index) {
value1[index] = intput1[index];
}
UdfUtils.copyMemory(value1, UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer1 + i * 16, value1.length);
UdfUtils.copyMemory(value2, UdfUtils.BYTE_ARRAY_OFFSET, null, inputBuffer2 + i * 16, value2.length);
UdfUtils.UNSAFE.putByte(null, inputNull1 + i, (byte) 0);
UdfUtils.UNSAFE.putByte(null, inputNull2 + i, (byte) 0);
}
params.setInputBufferPtrs(inputBufferPtr);
params.setInputNullsPtrs(inputNullPtr);
params.setInputOffsetsPtrs(0);
TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory();
TSerializer serializer = new TSerializer(factory);
UdfExecutor udfExecutor = new UdfExecutor(serializer.serialize(params));
udfExecutor.evaluate();
for (int i = 0; i < batchSize; ++i) {
byte[] bytes = new byte[16];
assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 0);
UdfUtils.copyMemory(null, outputBuffer + 16 * i, bytes, UdfUtils.BYTE_ARRAY_OFFSET, bytes.length);
BigInteger result = new BigInteger(convertByteOrder(bytes));
assert (result.equals(integerArray[i].add(BigInteger.valueOf(1))));
}
}
public byte[] convertByteOrder(byte[] bytes) {
int length = bytes.length;
for (int i = 0; i < length / 2; ++i) {
byte temp = bytes[i];
bytes[i] = bytes[length - 1 - i];
bytes[length - 1 - i] = temp;
}
return bytes;
}
}