[Enhancement](java-udf) java-udf module split to sub modules (#20185)

The java-udf module has become increasingly large and difficult to manage, making it inconvenient to package and use as needed. It needs to be split into multiple sub-modules, such as : java-commom、java-udf、jdbc-scanner、hudi-scanner、 paimon-scanner.

Co-authored-by: lexluo <lexluo@tencent.com>
This commit is contained in:
lexluo09
2023-06-13 09:41:22 +08:00
committed by GitHub
parent 51bbf17786
commit 57656b2459
64 changed files with 908 additions and 218 deletions

View File

@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.exception;
public class InternalException extends Exception {
public InternalException(String msg, Throwable cause) {
super(msg, cause);
}
public InternalException(String msg) {
super(msg);
}
}

View File

@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.exception;
public class UdfRuntimeException extends Exception {
public UdfRuntimeException(String msg, Throwable cause) {
super(msg, cause);
}
public UdfRuntimeException(String msg) {
super(msg);
}
}

View File

@ -0,0 +1,103 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni;
import org.apache.doris.common.jni.vec.ColumnType;
import org.apache.doris.common.jni.vec.ColumnValue;
import org.apache.doris.common.jni.vec.ScanPredicate;
import org.apache.doris.common.jni.vec.VectorTable;
import java.io.IOException;
public abstract class JniScanner {
protected VectorTable vectorTable;
protected String[] fields;
protected ColumnType[] types;
protected ScanPredicate[] predicates;
protected int batchSize;
// Initialize JniScanner
public abstract void open() throws IOException;
// Close JniScanner and release resources
public abstract void close() throws IOException;
// Scan data and save as vector table
protected abstract int getNext() throws IOException;
protected void initTableInfo(ColumnType[] requiredTypes, String[] requiredFields, ScanPredicate[] predicates,
int batchSize) {
this.types = requiredTypes;
this.fields = requiredFields;
this.predicates = predicates;
this.batchSize = batchSize;
}
protected void appendData(int index, ColumnValue value) {
vectorTable.appendData(index, value);
}
protected int getBatchSize() {
return batchSize;
}
public VectorTable getTable() {
return vectorTable;
}
public long getNextBatchMeta() throws IOException {
if (vectorTable == null) {
vectorTable = new VectorTable(types, fields, predicates, batchSize);
}
int numRows;
try {
numRows = getNext();
} catch (IOException e) {
releaseTable();
throw e;
}
if (numRows == 0) {
releaseTable();
return 0;
}
return getMetaAddress(numRows);
}
private long getMetaAddress(int numRows) {
vectorTable.setNumRows(numRows);
return vectorTable.getMetaAddress();
}
protected void resetTable() {
if (vectorTable != null) {
vectorTable.reset();
}
}
protected void releaseColumn(int fieldId) {
vectorTable.releaseColumn(fieldId);
}
protected void releaseTable() {
if (vectorTable != null) {
vectorTable.close();
}
vectorTable = null;
}
}

View File

@ -0,0 +1,193 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni;
import org.apache.doris.common.jni.vec.ColumnType;
import org.apache.doris.common.jni.vec.ColumnValue;
import org.apache.doris.common.jni.vec.ScanPredicate;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Map;
/**
* The demo usage of JniScanner. This class will only be retained during the functional testing phase to
* verify that the communication and data exchange with the BE are correct.
*/
public class MockJniScanner extends JniScanner {
public static class MockColumnValue implements ColumnValue {
private int i;
private int j;
public MockColumnValue() {
}
public void set(int i, int j) {
this.i = i;
this.j = j;
}
@Override
public boolean isNull() {
return false;
}
@Override
public boolean getBoolean() {
return (i + j) % 2 == 0;
}
@Override
public byte getByte() {
return (byte) (i + j);
}
@Override
public short getShort() {
return (short) (i - j);
}
@Override
public int getInt() {
return i + j;
}
@Override
public float getFloat() {
return (float) (j + i - 11) / (i + 1);
}
@Override
public long getLong() {
return (long) (i - 13) * (j + 1);
}
@Override
public double getDouble() {
return (double) (j + i - 15) / (i + 1);
}
@Override
public BigInteger getBigInteger() {
return BigInteger.valueOf(getLong());
}
@Override
public BigDecimal getDecimal() {
return BigDecimal.valueOf(getDouble());
}
@Override
public String getString() {
return "row-" + i + "-column-" + j;
}
@Override
public LocalDate getDate() {
return LocalDate.now();
}
@Override
public LocalDateTime getDateTime() {
return LocalDateTime.now();
}
@Override
public byte[] getBytes() {
return ("row-" + i + "-column-" + j).getBytes(StandardCharsets.UTF_8);
}
@Override
public void unpackArray(List<ColumnValue> values) {
}
@Override
public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
}
@Override
public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> values) {
}
}
private static final Logger LOG = Logger.getLogger(MockJniScanner.class);
private final int mockRows;
private int readRows = 0;
private final MockColumnValue columnValue = new MockColumnValue();
public MockJniScanner(int batchSize, Map<String, String> params) {
mockRows = Integer.parseInt(params.get("mock_rows"));
String[] requiredFields = params.get("required_fields").split(",");
String[] types = params.get("columns_types").split("#");
ColumnType[] columnTypes = new ColumnType[types.length];
for (int i = 0; i < types.length; i++) {
columnTypes[i] = ColumnType.parseType(requiredFields[i], types[i]);
}
ScanPredicate[] predicates = new ScanPredicate[0];
if (params.containsKey("push_down_predicates")) {
long predicatesAddress = Long.parseLong(params.get("push_down_predicates"));
if (predicatesAddress != 0) {
predicates = ScanPredicate.parseScanPredicates(predicatesAddress, columnTypes);
LOG.info("MockJniScanner gets pushed-down predicates: " + ScanPredicate.dump(predicates));
}
}
initTableInfo(columnTypes, requiredFields, predicates, batchSize);
}
@Override
public void open() throws IOException {
}
@Override
public void close() throws IOException {
}
@Override
protected int getNext() throws IOException {
if (readRows == mockRows) {
return 0;
}
int rows = Math.min(batchSize, mockRows - readRows);
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < types.length; ++j) {
if ((i + j) % 16 == 0) {
appendData(j, null);
} else {
columnValue.set(i, j);
appendData(j, columnValue);
}
}
}
readRows += rows;
return rows;
}
}

View File

@ -0,0 +1,282 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-4.0.0/fe/src/main/java/org/apache/impala/util/JMXJsonUtil.java
// and modified by Doris
package org.apache.doris.common.jni.utils;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.management.ManagementFactory;
import java.lang.reflect.Array;
import java.util.Iterator;
import java.util.Set;
import javax.management.AttributeNotFoundException;
import javax.management.InstanceNotFoundException;
import javax.management.IntrospectionException;
import javax.management.MBeanAttributeInfo;
import javax.management.MBeanException;
import javax.management.MBeanInfo;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import javax.management.ReflectionException;
import javax.management.RuntimeErrorException;
import javax.management.RuntimeMBeanException;
import javax.management.openmbean.CompositeData;
import javax.management.openmbean.CompositeType;
import javax.management.openmbean.TabularData;
/**
* Utility class that returns a JSON representation of the JMX beans.
* This is based on hadoop-common's implementation of JMXJsonServlet.
* <p>
* Output format:
* {
* "beans" : [
* {
* "name":"bean-name"
* ...
* }
* ]
* }
* Each bean's attributes will be converted to a JSON object member.
* If the attribute is a boolean, a number, a string, or an array
* it will be converted to the JSON equivalent.
* <p>
* If the value is a {@link CompositeData} then it will be converted
* to a JSON object with the keys as the name of the JSON member and
* the value is converted following these same rules.
* If the value is a {@link TabularData} then it will be converted
* to an array of the {@link CompositeData} elements that it contains.
* All other objects will be converted to a string and output as such.
* The bean's name and modelerType will be returned for all beans.
*/
public class JMXJsonUtil {
// MBean server instance
protected static transient MBeanServer mBeanServer =
ManagementFactory.getPlatformMBeanServer();
private static final Logger LOG = Logger.getLogger(JMXJsonUtil.class);
// Returns the JMX beans as a JSON string.
public static String getJMXJson() {
StringWriter writer = new StringWriter();
try {
JsonGenerator jg = null;
try {
JsonFactory jsonFactory = new JsonFactory();
jg = jsonFactory.createJsonGenerator(writer);
jg.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET);
jg.writeStartObject();
if (mBeanServer == null) {
jg.writeStringField("result", "ERROR");
jg.writeStringField("message", "No MBeanServer could be found");
jg.close();
LOG.error("No MBeanServer could be found.");
return writer.toString();
}
listBeans(jg);
} finally {
if (jg != null) {
jg.close();
}
if (writer != null) {
writer.close();
}
}
} catch (IOException e) {
LOG.error("Caught an exception while processing JMX request", e);
}
return writer.toString();
}
// Utility method that lists all the mbeans and write them using the supplied
// JsonGenerator.
private static void listBeans(JsonGenerator jg) throws IOException {
Set<ObjectName> names;
names = mBeanServer.queryNames(null, null);
jg.writeArrayFieldStart("beans");
Iterator<ObjectName> it = names.iterator();
while (it.hasNext()) {
ObjectName oname = it.next();
MBeanInfo minfo;
String code = "";
try {
minfo = mBeanServer.getMBeanInfo(oname);
code = minfo.getClassName();
String prs = "";
try {
if ("org.apache.commons.modeler.BaseModelMBean".equals(code)) {
prs = "modelerType";
code = (String) mBeanServer.getAttribute(oname, prs);
}
} catch (AttributeNotFoundException e) {
// If the modelerType attribute was not found, the class name is used
// instead.
LOG.error("getting attribute " + prs + " of " + oname
+ " threw an exception", e);
} catch (MBeanException e) {
// The code inside the attribute getter threw an exception so log it,
// and fall back on the class name
LOG.error("getting attribute " + prs + " of " + oname
+ " threw an exception", e);
} catch (RuntimeException e) {
// For some reason even with an MBeanException available to them
// Runtime exceptionscan still find their way through, so treat them
// the same as MBeanException
LOG.error("getting attribute " + prs + " of " + oname
+ " threw an exception", e);
} catch (ReflectionException e) {
// This happens when the code inside the JMX bean (setter?? from the
// java docs) threw an exception, so log it and fall back on the
// class name
LOG.error("getting attribute " + prs + " of " + oname
+ " threw an exception", e);
}
} catch (InstanceNotFoundException e) {
//Ignored for some reason the bean was not found so don't output it
continue;
} catch (IntrospectionException | ReflectionException e) {
// This is an internal error, something odd happened with reflection so
// log it and don't output the bean.
LOG.error("Problem while trying to process JMX query with MBean " + oname, e);
continue;
}
jg.writeStartObject();
jg.writeStringField("name", oname.toString());
jg.writeStringField("modelerType", code);
MBeanAttributeInfo[] attrs = minfo.getAttributes();
for (int i = 0; i < attrs.length; i++) {
writeAttribute(jg, oname, attrs[i]);
}
jg.writeEndObject();
}
jg.writeEndArray();
}
// Utility method to write mBean attributes.
private static void writeAttribute(JsonGenerator jg, ObjectName oname,
MBeanAttributeInfo attr) throws IOException {
if (!attr.isReadable()) {
return;
}
String attName = attr.getName();
if ("modelerType".equals(attName)) {
return;
}
if (attName.indexOf("=") >= 0 || attName.indexOf(":") >= 0
|| attName.indexOf(" ") >= 0) {
return;
}
Object value = null;
try {
value = mBeanServer.getAttribute(oname, attName);
} catch (RuntimeMBeanException e) {
// UnsupportedOperationExceptions happen in the normal course of business,
// so no need to log them as errors all the time.
if (e.getCause() instanceof UnsupportedOperationException) {
LOG.trace("getting attribute " + attName + " of " + oname + " threw an exception", e);
} else {
LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e);
}
return;
} catch (RuntimeErrorException e) {
// RuntimeErrorException happens when an unexpected failure occurs in getAttribute
// for example https://issues.apache.org/jira/browse/DAEMON-120
LOG.debug("getting attribute " + attName + " of " + oname + " threw an exception", e);
return;
} catch (AttributeNotFoundException e) {
//Ignored the attribute was not found, which should never happen because the bean
//just told us that it has this attribute, but if this happens just don't output
//the attribute.
return;
} catch (MBeanException e) {
//The code inside the attribute getter threw an exception so log it, and
// skip outputting the attribute
LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e);
return;
} catch (RuntimeException e) {
//For some reason even with an MBeanException available to them Runtime exceptions
//can still find their way through, so treat them the same as MBeanException
LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e);
return;
} catch (ReflectionException e) {
//This happens when the code inside the JMX bean (setter?? from the java docs)
//threw an exception, so log it and skip outputting the attribute
LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e);
return;
} catch (InstanceNotFoundException e) {
//Ignored the mbean itself was not found, which should never happen because we
//just accessed it (perhaps something unregistered in-between) but if this
//happens just don't output the attribute.
return;
}
writeAttribute(jg, attName, value);
}
private static void writeAttribute(JsonGenerator jg, String attName, Object value)
throws IOException {
jg.writeFieldName(attName);
writeObject(jg, value);
}
private static void writeObject(JsonGenerator jg, Object value) throws IOException {
if (value == null) {
jg.writeNull();
} else {
Class<?> c = value.getClass();
if (c.isArray()) {
jg.writeStartArray();
int len = Array.getLength(value);
for (int j = 0; j < len; j++) {
Object item = Array.get(value, j);
writeObject(jg, item);
}
jg.writeEndArray();
} else if (value instanceof Number) {
Number n = (Number) value;
jg.writeNumber(n.toString());
} else if (value instanceof Boolean) {
Boolean b = (Boolean) value;
jg.writeBoolean(b);
} else if (value instanceof CompositeData) {
CompositeData cds = (CompositeData) value;
CompositeType comp = cds.getCompositeType();
Set<String> keys = comp.keySet();
jg.writeStartObject();
for (String key : keys) {
writeAttribute(jg, key, cds.get(key));
}
jg.writeEndObject();
} else if (value instanceof TabularData) {
TabularData tds = (TabularData) value;
jg.writeStartArray();
for (Object entry : tds.values()) {
writeObject(jg, entry);
}
jg.writeEndArray();
} else {
jg.writeString(value.toString());
}
}
}
}

View File

@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.utils;
/**
* Native method in doris::JavaNativeMethods.
*/
public class JNINativeMethod {
/**
* Resize string column and return the new column address in off heap.
*/
public static native long resizeStringColumn(long columnAddr, int byteSize);
/**
* Allocate memory in off heap, which will be tracked by memory tracker.
*/
public static native long memoryTrackerMalloc(long size);
/**
* Free memory in off heap, which will be tracked by memory tracker.
*/
public static native void memoryTrackerFree(long address);
}

View File

@ -0,0 +1,272 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.utils;
import org.apache.doris.common.exception.InternalException;
import org.apache.doris.thrift.TGetJMXJsonResponse;
import org.apache.doris.thrift.TGetJvmMemoryMetricsResponse;
import org.apache.doris.thrift.TGetJvmThreadsInfoRequest;
import org.apache.doris.thrift.TGetJvmThreadsInfoResponse;
import org.apache.doris.thrift.TJvmMemoryPool;
import org.apache.doris.thrift.TJvmThreadInfo;
import com.google.common.base.Joiner;
import org.apache.thrift.TBase;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocolFactory;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.lang.management.MemoryPoolMXBean;
import java.lang.management.MemoryUsage;
import java.lang.management.RuntimeMXBean;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.ArrayList;
import java.util.Map;
/**
* Utility class with methods intended for JNI clients
*/
public class JniUtil {
private static final TBinaryProtocol.Factory protocolFactory_ = new TBinaryProtocol.Factory();
/**
* Initializes the JvmPauseMonitor instance.
*/
public static void initPauseMonitor(long deadlockCheckIntervalS) {
JvmPauseMonitor.INSTANCE.initPauseMonitor(deadlockCheckIntervalS);
}
/**
* Returns a formatted string containing the simple exception name and the
* exception message without the full stack trace. Includes the
* the chain of causes each in a separate line.
*/
public static String throwableToString(Throwable t) {
StringWriter output = new StringWriter();
output.write(String.format("%s: %s", t.getClass().getSimpleName(),
t.getMessage()));
// Follow the chain of exception causes and print them as well.
Throwable cause = t;
while ((cause = cause.getCause()) != null) {
output.write(String.format("\nCAUSED BY: %s: %s",
cause.getClass().getSimpleName(), cause.getMessage()));
}
return output.toString();
}
/**
* Returns the stack trace of the Throwable object.
*/
public static String throwableToStackTrace(Throwable t) {
Writer output = new StringWriter();
t.printStackTrace(new PrintWriter(output));
return output.toString();
}
/**
* Serializes input into a byte[] using the default protocol factory.
*/
public static <T extends TBase<?, ?>> byte[] serializeToThrift(T input) throws InternalException {
try {
TSerializer serializer = new TSerializer(protocolFactory_);
return serializer.serialize(input);
} catch (TException e) {
throw new InternalException(e.getMessage());
}
}
/**
* Serializes input into a byte[] using a given protocol factory.
*/
public static <T extends TBase<?, ?>, F extends TProtocolFactory> byte[] serializeToThrift(
T input, F protocolFactory) throws InternalException {
try {
TSerializer serializer = new TSerializer(protocolFactory);
return serializer.serialize(input);
} catch (TException e) {
throw new InternalException(e.getMessage());
}
}
public static <T extends TBase<?, ?>> void deserializeThrift(
T result, byte[] thriftData) throws InternalException {
deserializeThrift(protocolFactory_, result, thriftData);
}
/**
* Deserialize a serialized form of a Thrift data structure to its object form.
*/
public static <T extends TBase<?, ?>, F extends TProtocolFactory> void deserializeThrift(
F protocolFactory, T result, byte[] thriftData) throws InternalException {
// TODO: avoid creating deserializer for each query?
try {
TDeserializer deserializer = new TDeserializer(protocolFactory);
deserializer.deserialize(result, thriftData);
} catch (TException e) {
throw new InternalException(e.getMessage());
}
}
/**
* Collect the JVM's memory statistics into a thrift structure for translation into
* Doris metrics by the backend. A synthetic 'total' memory pool is included with
* aggregate statistics for all real pools. Metrics for the JvmPauseMonitor
* and Garbage Collection are also included.
*/
public static byte[] getJvmMemoryMetrics() throws InternalException {
TGetJvmMemoryMetricsResponse jvmMetrics = new TGetJvmMemoryMetricsResponse();
jvmMetrics.setMemoryPools(new ArrayList<TJvmMemoryPool>());
TJvmMemoryPool totalUsage = new TJvmMemoryPool();
totalUsage.setName("total");
jvmMetrics.getMemoryPools().add(totalUsage);
for (MemoryPoolMXBean memBean : ManagementFactory.getMemoryPoolMXBeans()) {
TJvmMemoryPool usage = new TJvmMemoryPool();
MemoryUsage beanUsage = memBean.getUsage();
usage.setCommitted(beanUsage.getCommitted());
usage.setInit(beanUsage.getInit());
usage.setMax(beanUsage.getMax());
usage.setUsed(beanUsage.getUsed());
usage.setName(memBean.getName());
totalUsage.committed += beanUsage.getCommitted();
totalUsage.init += beanUsage.getInit();
totalUsage.max += beanUsage.getMax();
totalUsage.used += beanUsage.getUsed();
MemoryUsage peakUsage = memBean.getPeakUsage();
usage.setPeakCommitted(peakUsage.getCommitted());
usage.setPeakInit(peakUsage.getInit());
usage.setPeakMax(peakUsage.getMax());
usage.setPeakUsed(peakUsage.getUsed());
totalUsage.peak_committed += peakUsage.getCommitted();
totalUsage.peak_init += peakUsage.getInit();
totalUsage.peak_max += peakUsage.getMax();
totalUsage.peak_used += peakUsage.getUsed();
jvmMetrics.getMemoryPools().add(usage);
}
// Populate heap usage
MemoryMXBean mBean = ManagementFactory.getMemoryMXBean();
TJvmMemoryPool heap = new TJvmMemoryPool();
MemoryUsage heapUsage = mBean.getHeapMemoryUsage();
heap.setCommitted(heapUsage.getCommitted());
heap.setInit(heapUsage.getInit());
heap.setMax(heapUsage.getMax());
heap.setUsed(heapUsage.getUsed());
heap.setName("heap");
heap.setPeakCommitted(0);
heap.setPeakInit(0);
heap.setPeakMax(0);
heap.setPeakUsed(0);
jvmMetrics.getMemoryPools().add(heap);
// Populate non-heap usage
TJvmMemoryPool nonHeap = new TJvmMemoryPool();
MemoryUsage nonHeapUsage = mBean.getNonHeapMemoryUsage();
nonHeap.setCommitted(nonHeapUsage.getCommitted());
nonHeap.setInit(nonHeapUsage.getInit());
nonHeap.setMax(nonHeapUsage.getMax());
nonHeap.setUsed(nonHeapUsage.getUsed());
nonHeap.setName("non-heap");
nonHeap.setPeakCommitted(0);
nonHeap.setPeakInit(0);
nonHeap.setPeakMax(0);
nonHeap.setPeakUsed(0);
jvmMetrics.getMemoryPools().add(nonHeap);
// Populate JvmPauseMonitor metrics
jvmMetrics.setGcNumWarnThresholdExceeded(
JvmPauseMonitor.INSTANCE.getNumGcWarnThresholdExceeded());
jvmMetrics.setGcNumInfoThresholdExceeded(
JvmPauseMonitor.INSTANCE.getNumGcInfoThresholdExceeded());
jvmMetrics.setGcTotalExtraSleepTimeMillis(
JvmPauseMonitor.INSTANCE.getTotalGcExtraSleepTime());
// And Garbage Collector metrics
long gcCount = 0;
long gcTimeMillis = 0;
for (GarbageCollectorMXBean bean : ManagementFactory.getGarbageCollectorMXBeans()) {
gcCount += bean.getCollectionCount();
gcTimeMillis += bean.getCollectionTime();
}
jvmMetrics.setGcCount(gcCount);
jvmMetrics.setGcTimeMillis(gcTimeMillis);
return serializeToThrift(jvmMetrics, protocolFactory_);
}
/**
* Get information about the live JVM threads.
*/
public static byte[] getJvmThreadsInfo(byte[] argument) throws InternalException {
TGetJvmThreadsInfoRequest request = new TGetJvmThreadsInfoRequest();
JniUtil.deserializeThrift(protocolFactory_, request, argument);
TGetJvmThreadsInfoResponse response = new TGetJvmThreadsInfoResponse();
ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
response.setTotalThreadCount(threadBean.getThreadCount());
response.setDaemonThreadCount(threadBean.getDaemonThreadCount());
response.setPeakThreadCount(threadBean.getPeakThreadCount());
if (request.get_complete_info) {
for (ThreadInfo threadInfo : threadBean.dumpAllThreads(true, true)) {
TJvmThreadInfo tThreadInfo = new TJvmThreadInfo();
long id = threadInfo.getThreadId();
tThreadInfo.setSummary(threadInfo.toString());
tThreadInfo.setCpuTimeInNs(threadBean.getThreadCpuTime(id));
tThreadInfo.setUserTimeInNs(threadBean.getThreadUserTime(id));
tThreadInfo.setBlockedCount(threadInfo.getBlockedCount());
tThreadInfo.setBlockedTimeInMs(threadInfo.getBlockedTime());
tThreadInfo.setIsInNative(threadInfo.isInNative());
response.addToThreads(tThreadInfo);
}
}
return serializeToThrift(response, protocolFactory_);
}
public static byte[] getJMXJson() throws InternalException {
TGetJMXJsonResponse response = new TGetJMXJsonResponse(JMXJsonUtil.getJMXJson());
return serializeToThrift(response, protocolFactory_);
}
/**
* Get Java version, input arguments and system properties.
*/
public static String getJavaVersion() {
RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
StringBuilder sb = new StringBuilder();
sb.append("Java Input arguments:\n");
sb.append(Joiner.on(" ").join(runtime.getInputArguments()));
sb.append("\nJava System properties:\n");
for (Map.Entry<String, String> entry : runtime.getSystemProperties().entrySet()) {
sb.append(entry.getKey() + ":" + entry.getValue() + "\n");
}
return sb.toString();
}
}

View File

@ -0,0 +1,321 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-4.0.0/fe/src/main/java/org/apache/impala/util/JvmPauseMonitor.java
// and modified by Doris
package org.apache.doris.common.jni.utils;
import com.google.common.base.Joiner;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.log4j.Logger;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
/**
* Class which sets up a simple thread which runs in a loop sleeping
* for a short interval of time. If the sleep takes significantly longer
* than its target time, it implies that the JVM or host machine has
* paused processing, which may cause other problems. If such a pause is
* detected, the thread logs a message.
*/
public class JvmPauseMonitor {
private static final Logger LOG = Logger.getLogger(JvmPauseMonitor.class);
// The target sleep time.
private static final long SLEEP_INTERVAL_MS = 500;
// Check for Java deadlocks at this interval. Set by init(). 0 or negative means that
// the deadlock checks are disabled.
private long deadlockCheckIntervalS = 0;
// log WARN if we detect a pause longer than this threshold.
private long warnThresholdMs;
private static final long WARN_THRESHOLD_MS = 10000;
// log INFO if we detect a pause longer than this threshold.
private long infoThresholdMs;
private static final long INFO_THRESHOLD_MS = 1000;
// Overall metrics
// Volatile to allow populating metrics concurrently with the values
// being updated without staleness (but with no other synchronization
// guarantees).
private volatile long numGcWarnThresholdExceeded = 0;
private volatile long numGcInfoThresholdExceeded = 0;
private volatile long totalGcExtraSleepTime = 0;
// Daemon thread running the pause monitor loop.
private Thread monitorThread;
private volatile boolean shouldRun = true;
// Singleton instance of this pause monitor.
public static JvmPauseMonitor INSTANCE = new JvmPauseMonitor();
// Initializes the pause monitor. No-op if called multiple times.
public static void initPauseMonitor(long deadlockCheckIntervalS) {
if (INSTANCE.isStarted()) {
return;
}
INSTANCE.init(deadlockCheckIntervalS);
}
private JvmPauseMonitor() {
this(INFO_THRESHOLD_MS, WARN_THRESHOLD_MS);
}
private JvmPauseMonitor(long infoThresholdMs, long warnThresholdMs) {
this.infoThresholdMs = infoThresholdMs;
this.warnThresholdMs = warnThresholdMs;
}
protected void init(long deadlockCheckIntervalS) {
this.deadlockCheckIntervalS = deadlockCheckIntervalS;
monitorThread = new Thread(new Monitor(), "JVM pause monitor");
monitorThread.setDaemon(true);
monitorThread.start();
}
public boolean isStarted() {
return monitorThread != null;
}
public long getNumGcWarnThresholdExceeded() {
return numGcWarnThresholdExceeded;
}
public long getNumGcInfoThresholdExceeded() {
return numGcInfoThresholdExceeded;
}
public long getTotalGcExtraSleepTime() {
return totalGcExtraSleepTime;
}
/**
* Helper method that formats the message to be logged, along with
* the GC metrics.
*/
private String formatMessage(long extraSleepTime,
Map<String, GcTimes> gcTimesAfterSleep,
Map<String, GcTimes> gcTimesBeforeSleep) {
Set<String> gcBeanNames = Sets.intersection(
gcTimesAfterSleep.keySet(),
gcTimesBeforeSleep.keySet());
List<String> gcDiffs = Lists.newArrayList();
for (String name : gcBeanNames) {
GcTimes diff = gcTimesAfterSleep.get(name).subtract(
gcTimesBeforeSleep.get(name));
if (diff.gcCount != 0) {
gcDiffs.add("GC pool '" + name + "' had collection(s): " + diff);
}
}
String ret = "Detected pause in JVM or host machine (eg GC): "
+ "pause of approximately " + extraSleepTime + "ms\n";
if (gcDiffs.isEmpty()) {
ret += "No GCs detected";
} else {
ret += Joiner.on("\n").join(gcDiffs);
}
return ret;
}
private Map<String, GcTimes> getGcTimes() {
Map<String, GcTimes> map = Maps.newHashMap();
List<GarbageCollectorMXBean> gcBeans =
ManagementFactory.getGarbageCollectorMXBeans();
for (GarbageCollectorMXBean gcBean : gcBeans) {
map.put(gcBean.getName(), new GcTimes(gcBean));
}
return map;
}
private static class GcTimes {
private GcTimes(GarbageCollectorMXBean gcBean) {
gcCount = gcBean.getCollectionCount();
gcTimeMillis = gcBean.getCollectionTime();
}
private GcTimes(long count, long time) {
this.gcCount = count;
this.gcTimeMillis = time;
}
private GcTimes subtract(GcTimes other) {
return new GcTimes(this.gcCount - other.gcCount,
this.gcTimeMillis - other.gcTimeMillis);
}
@Override
public String toString() {
return "count=" + gcCount + " time=" + gcTimeMillis + "ms";
}
private long gcCount;
private long gcTimeMillis;
}
/**
* Runnable instance of the pause monitor loop. Launched from serviceStart().
*/
private class Monitor implements Runnable {
@Override
public void run() {
Stopwatch sw = Stopwatch.createUnstarted();
Stopwatch timeSinceDeadlockCheck = Stopwatch.createStarted();
Map<String, GcTimes> gcTimesBeforeSleep = getGcTimes();
LOG.info("Starting JVM pause monitor");
while (shouldRun) {
sw.reset().start();
try {
Thread.sleep(SLEEP_INTERVAL_MS);
} catch (InterruptedException ie) {
LOG.error("JVM pause monitor interrupted", ie);
return;
}
sw.stop();
long extraSleepTime = sw.elapsed(TimeUnit.MILLISECONDS) - SLEEP_INTERVAL_MS;
Map<String, GcTimes> gcTimesAfterSleep = getGcTimes();
if (extraSleepTime > warnThresholdMs) {
++numGcWarnThresholdExceeded;
LOG.warn(formatMessage(
extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep));
} else if (extraSleepTime > infoThresholdMs) {
++numGcInfoThresholdExceeded;
LOG.info(formatMessage(
extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep));
}
totalGcExtraSleepTime += extraSleepTime;
gcTimesBeforeSleep = gcTimesAfterSleep;
if (deadlockCheckIntervalS > 0
&& timeSinceDeadlockCheck.elapsed(TimeUnit.SECONDS) >= deadlockCheckIntervalS) {
checkForDeadlocks();
timeSinceDeadlockCheck.reset().start();
}
}
}
/**
* Check for deadlocks between Java threads using the JVM's deadlock detector.
* If a deadlock is found, log info about the deadlocked threads and exit the
* process.
* <p>
* We choose to exit the process this situation because the deadlock will likely
* cause hangs and other forms of service unavailability and there is no way to
* recover from the deadlock except by restarting the process.
*/
private void checkForDeadlocks() {
ThreadMXBean threadMx = ManagementFactory.getThreadMXBean();
long[] deadlockedTids = threadMx.findDeadlockedThreads();
if (deadlockedTids != null) {
ThreadInfo[] deadlockedThreads =
threadMx.getThreadInfo(deadlockedTids, true, true);
// Log diagnostics with error before aborting the process with a FATAL log.
LOG.error("Found " + deadlockedThreads.length + " threads in deadlock: ");
for (ThreadInfo thread : deadlockedThreads) {
// Defensively check for null in case the thread somehow disappeared between
// findDeadlockedThreads() and getThreadInfo().
if (thread != null) {
LOG.error(thread.toString());
}
}
LOG.warn("All threads:");
for (ThreadInfo thread : threadMx.dumpAllThreads(true, true)) {
LOG.error(thread.toString());
}
// In the context of an Doris service, LOG.fatal calls glog's fatal, which
// aborts the process, which will produce a coredump if coredumps are enabled.
LOG.fatal("Aborting because of deadlocked threads in JVM.");
System.exit(1);
}
}
}
/**
* Helper for manual testing that causes a deadlock between java threads.
*/
private static void causeDeadlock() {
final Object obj1 = new Object();
final Object obj2 = new Object();
new Thread(new Runnable() {
@Override
public void run() {
while (true) {
synchronized (obj2) {
synchronized (obj1) {
System.err.println("Thread 1 got locks");
}
}
}
}
}).start();
while (true) {
synchronized (obj1) {
synchronized (obj2) {
System.err.println("Thread 2 got locks");
}
}
}
}
/**
* This function just leaks memory into a list. Running this function
* with a 1GB heap will very quickly go into "GC hell" and result in
* log messages about the GC pauses.
*/
private static void allocateMemory() {
List<String> list = Lists.newArrayList();
int i = 0;
while (true) {
list.add(String.valueOf(i++));
}
}
/**
* Simple 'main' to facilitate manual testing of the pause monitor.
*/
@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
JvmPauseMonitor monitor = new JvmPauseMonitor();
monitor.init(60);
if (args[0].equals("gc")) {
allocateMemory();
} else if (args[0].equals("deadlock")) {
causeDeadlock();
} else {
System.err.println("Unknown mode");
}
}
}

View File

@ -0,0 +1,183 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.utils;
import sun.misc.Unsafe;
import java.lang.reflect.Field;
import java.security.AccessController;
import java.security.PrivilegedAction;
/**
* Reference to Apache Spark with some customization.
* Default call native method to allocate and release memory, which will be tracked by memory tracker in BE.
* Call {@link OffHeap#setTesting()} in test scenario.
*/
public class OffHeap {
private static final long UNSAFE_COPY_THRESHOLD = 1024L * 1024L;
private static boolean IS_TESTING = false;
private static final Unsafe UNSAFE;
public static final int BOOLEAN_ARRAY_OFFSET;
public static final int BYTE_ARRAY_OFFSET;
public static final int SHORT_ARRAY_OFFSET;
public static final int INT_ARRAY_OFFSET;
public static final int LONG_ARRAY_OFFSET;
public static final int FLOAT_ARRAY_OFFSET;
public static final int DOUBLE_ARRAY_OFFSET;
static {
UNSAFE = (Unsafe) AccessController.doPrivileged(
(PrivilegedAction<Object>) () -> {
try {
Field f = Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true);
return f.get(null);
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new Error();
}
});
BOOLEAN_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(boolean[].class);
BYTE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
SHORT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(short[].class);
INT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(int[].class);
LONG_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(long[].class);
FLOAT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(float[].class);
DOUBLE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(double[].class);
}
public static void setTesting() {
IS_TESTING = true;
}
public static int getInt(Object object, long offset) {
return UNSAFE.getInt(object, offset);
}
public static void putInt(Object object, long offset, int value) {
UNSAFE.putInt(object, offset, value);
}
public static boolean getBoolean(Object object, long offset) {
return UNSAFE.getBoolean(object, offset);
}
public static void putBoolean(Object object, long offset, boolean value) {
UNSAFE.putBoolean(object, offset, value);
}
public static byte getByte(Object object, long offset) {
return UNSAFE.getByte(object, offset);
}
public static void putByte(Object object, long offset, byte value) {
UNSAFE.putByte(object, offset, value);
}
public static short getShort(Object object, long offset) {
return UNSAFE.getShort(object, offset);
}
public static void putShort(Object object, long offset, short value) {
UNSAFE.putShort(object, offset, value);
}
public static long getLong(Object object, long offset) {
return UNSAFE.getLong(object, offset);
}
public static void putLong(Object object, long offset, long value) {
UNSAFE.putLong(object, offset, value);
}
public static float getFloat(Object object, long offset) {
return UNSAFE.getFloat(object, offset);
}
public static void putFloat(Object object, long offset, float value) {
UNSAFE.putFloat(object, offset, value);
}
public static double getDouble(Object object, long offset) {
return UNSAFE.getDouble(object, offset);
}
public static void putDouble(Object object, long offset, double value) {
UNSAFE.putDouble(object, offset, value);
}
public static void setMemory(long address, byte value, long size) {
UNSAFE.setMemory(address, size, value);
}
public static long allocateMemory(long size) {
if (IS_TESTING) {
return UNSAFE.allocateMemory(size);
} else {
return JNINativeMethod.memoryTrackerMalloc(size);
}
}
public static void freeMemory(long address) {
if (IS_TESTING) {
UNSAFE.freeMemory(address);
} else {
JNINativeMethod.memoryTrackerFree(address);
}
}
public static long reallocateMemory(long address, long oldSize, long newSize) {
long newMemory = allocateMemory(newSize);
copyMemory(null, address, null, newMemory, oldSize);
freeMemory(address);
return newMemory;
}
public static void copyMemory(Object src, long srcOffset, Object dst, long dstOffset, long length) {
// Check if dstOffset is before or after srcOffset to determine if we should copy
// forward or backwards. This is necessary in case src and dst overlap.
if (dstOffset < srcOffset) {
while (length > 0) {
long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
length -= size;
srcOffset += size;
dstOffset += size;
}
} else {
srcOffset += length;
dstOffset += length;
while (length > 0) {
long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
srcOffset -= size;
dstOffset -= size;
UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
length -= size;
}
}
}
}

View File

@ -0,0 +1,132 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.utils;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.time.DateTimeException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Arrays;
public class TypeNativeBytes {
/**
* Change the order of the bytes, Because JVM is Big-Endian , x86 is Little-Endian.
*/
public static byte[] convertByteOrder(byte[] bytes) {
int length = bytes.length;
for (int i = 0; i < length / 2; ++i) {
byte temp = bytes[i];
bytes[i] = bytes[length - 1 - i];
bytes[length - 1 - i] = temp;
}
return bytes;
}
public static byte[] getBigIntegerBytes(BigInteger v) {
byte[] bytes = v.toByteArray();
// If the BigInteger is not negative and the first byte is 0, remove the first byte
if (v.signum() >= 0 && bytes[0] == 0) {
bytes = Arrays.copyOfRange(bytes, 1, bytes.length);
}
// Convert the byte order if necessary
return convertByteOrder(bytes);
}
public static BigInteger getBigInteger(byte[] bytes) {
// Convert the byte order back if necessary
byte[] originalBytes = convertByteOrder(bytes);
// If the first byte has the sign bit set, add a 0 byte at the start
if ((originalBytes[0] & 0x80) != 0) {
byte[] extendedBytes = new byte[originalBytes.length + 1];
extendedBytes[0] = 0;
System.arraycopy(originalBytes, 0, extendedBytes, 1, originalBytes.length);
originalBytes = extendedBytes;
}
return new BigInteger(originalBytes);
}
public static byte[] getDecimalBytes(BigDecimal v, int scale, int size) {
BigDecimal retValue = v.setScale(scale, RoundingMode.HALF_EVEN);
BigInteger data = retValue.unscaledValue();
byte[] bytes = convertByteOrder(data.toByteArray());
byte[] value = new byte[size];
if (data.signum() == -1) {
Arrays.fill(value, (byte) -1);
}
System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length));
return value;
}
public static BigDecimal getDecimal(byte[] bytes, int scale) {
BigInteger value = new BigInteger(convertByteOrder(bytes));
return new BigDecimal(value, scale);
}
public static int convertToDateV2(int year, int month, int day) {
return (int) (day | (long) month << 5 | (long) year << 9);
}
public static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second) {
return (long) second << 20 | (long) minute << 26 | (long) hour << 32
| (long) day << 37 | (long) month << 42 | (long) year << 46;
}
public static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second,
int microsecond) {
return (long) microsecond | (long) second << 20 | (long) minute << 26 | (long) hour << 32
| (long) day << 37 | (long) month << 42 | (long) year << 46;
}
public static LocalDate convertToJavaDate(int date) {
int year = date >> 9;
int month = (date >> 5) & 0XF;
int day = date & 0X1F;
LocalDate value;
try {
value = LocalDate.of(year, month, day);
} catch (DateTimeException e) {
value = LocalDate.MAX;
}
return value;
}
public static LocalDateTime convertToJavaDateTime(long time) {
int year = (int) (time >> 46);
int yearMonth = (int) (time >> 42);
int yearMonthDay = (int) (time >> 37);
int month = (yearMonth & 0XF);
int day = (yearMonthDay & 0X1F);
int hour = (int) ((time >> 32) & 0X1F);
int minute = (int) ((time >> 26) & 0X3F);
int second = (int) ((time >> 20) & 0X3F);
int microsecond = (int) (time & 0XFFFFF);
LocalDateTime value;
try {
value = LocalDateTime.of(year, month, day, hour, minute, second, microsecond * 1000);
} catch (DateTimeException e) {
value = LocalDateTime.MAX;
}
return value;
}
}

View File

@ -0,0 +1,656 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.utils;
import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.Pair;
import org.apache.doris.common.exception.InternalException;
import org.apache.doris.thrift.TPrimitiveType;
import org.apache.doris.thrift.TScalarType;
import org.apache.doris.thrift.TTypeDesc;
import org.apache.doris.thrift.TTypeNode;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.vesoft.nebula.client.graph.data.DateTimeWrapper;
import com.vesoft.nebula.client.graph.data.DateWrapper;
import com.vesoft.nebula.client.graph.data.ValueWrapper;
import org.apache.log4j.Logger;
import sun.misc.Unsafe;
import java.io.File;
import java.io.FileNotFoundException;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.time.DateTimeException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Set;
public class UdfUtils {
private static final Logger LOG = Logger.getLogger(UdfUtils.class);
public static final Unsafe UNSAFE;
private static final long UNSAFE_COPY_THRESHOLD = 1024L * 1024L;
public static final long BYTE_ARRAY_OFFSET;
public static final long INT_ARRAY_OFFSET;
static {
UNSAFE = (Unsafe) AccessController.doPrivileged(
(PrivilegedAction<Object>) () -> {
try {
Field f = Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true);
return f.get(null);
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new Error();
}
});
BYTE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
INT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(int[].class);
}
// Data types that are supported as return or argument types in Java UDFs.
public enum JavaUdfDataType {
INVALID_TYPE("INVALID_TYPE", TPrimitiveType.INVALID_TYPE, 0),
BOOLEAN("BOOLEAN", TPrimitiveType.BOOLEAN, 1),
TINYINT("TINYINT", TPrimitiveType.TINYINT, 1),
SMALLINT("SMALLINT", TPrimitiveType.SMALLINT, 2),
INT("INT", TPrimitiveType.INT, 4),
BIGINT("BIGINT", TPrimitiveType.BIGINT, 8),
FLOAT("FLOAT", TPrimitiveType.FLOAT, 4),
DOUBLE("DOUBLE", TPrimitiveType.DOUBLE, 8),
CHAR("CHAR", TPrimitiveType.CHAR, 0),
VARCHAR("VARCHAR", TPrimitiveType.VARCHAR, 0),
STRING("STRING", TPrimitiveType.STRING, 0),
DATE("DATE", TPrimitiveType.DATE, 8),
DATETIME("DATETIME", TPrimitiveType.DATETIME, 8),
LARGEINT("LARGEINT", TPrimitiveType.LARGEINT, 16),
DECIMALV2("DECIMALV2", TPrimitiveType.DECIMALV2, 16),
DATEV2("DATEV2", TPrimitiveType.DATEV2, 4),
DATETIMEV2("DATETIMEV2", TPrimitiveType.DATETIMEV2, 8),
DECIMAL32("DECIMAL32", TPrimitiveType.DECIMAL32, 4),
DECIMAL64("DECIMAL64", TPrimitiveType.DECIMAL64, 8),
DECIMAL128("DECIMAL128", TPrimitiveType.DECIMAL128I, 16),
ARRAY_TYPE("ARRAY_TYPE", TPrimitiveType.ARRAY, 0);
private final String description;
private final TPrimitiveType thriftType;
private final int len;
private int precision;
private int scale;
private Type itemType;
JavaUdfDataType(String description, TPrimitiveType thriftType, int len) {
this.description = description;
this.thriftType = thriftType;
this.len = len;
}
@Override
public String toString() {
return description;
}
public TPrimitiveType getPrimitiveType() {
return thriftType;
}
public int getLen() {
return len;
}
public static Set<JavaUdfDataType> getCandidateTypes(Class<?> c) {
if (c == boolean.class || c == Boolean.class) {
return Sets.newHashSet(JavaUdfDataType.BOOLEAN);
} else if (c == byte.class || c == Byte.class) {
return Sets.newHashSet(JavaUdfDataType.TINYINT);
} else if (c == short.class || c == Short.class) {
return Sets.newHashSet(JavaUdfDataType.SMALLINT);
} else if (c == int.class || c == Integer.class) {
return Sets.newHashSet(JavaUdfDataType.INT);
} else if (c == long.class || c == Long.class) {
return Sets.newHashSet(JavaUdfDataType.BIGINT);
} else if (c == float.class || c == Float.class) {
return Sets.newHashSet(JavaUdfDataType.FLOAT);
} else if (c == double.class || c == Double.class) {
return Sets.newHashSet(JavaUdfDataType.DOUBLE);
} else if (c == char.class || c == Character.class) {
return Sets.newHashSet(JavaUdfDataType.CHAR);
} else if (c == String.class) {
return Sets.newHashSet(JavaUdfDataType.STRING);
} else if (Type.DATE_SUPPORTED_JAVA_TYPE.contains(c)) {
return Sets.newHashSet(JavaUdfDataType.DATE, JavaUdfDataType.DATEV2);
} else if (Type.DATETIME_SUPPORTED_JAVA_TYPE.contains(c)) {
return Sets.newHashSet(JavaUdfDataType.DATETIME, JavaUdfDataType.DATETIMEV2);
} else if (c == BigInteger.class) {
return Sets.newHashSet(JavaUdfDataType.LARGEINT);
} else if (c == BigDecimal.class) {
return Sets.newHashSet(JavaUdfDataType.DECIMALV2, JavaUdfDataType.DECIMAL32, JavaUdfDataType.DECIMAL64,
JavaUdfDataType.DECIMAL128);
} else if (c == java.util.ArrayList.class) {
return Sets.newHashSet(JavaUdfDataType.ARRAY_TYPE);
}
return Sets.newHashSet(JavaUdfDataType.INVALID_TYPE);
}
public static boolean isSupported(Type t) {
for (JavaUdfDataType javaType : JavaUdfDataType.values()) {
if (javaType == JavaUdfDataType.INVALID_TYPE) {
continue;
}
if (javaType.getPrimitiveType() == t.getPrimitiveType().toThrift()) {
return true;
}
}
return false;
}
public int getPrecision() {
return precision;
}
public void setPrecision(int precision) {
this.precision = precision;
}
public int getScale() {
return this.thriftType == TPrimitiveType.DECIMALV2 ? 9 : scale;
}
public void setScale(int scale) {
this.scale = scale;
}
public Type getItemType() {
return itemType;
}
public void setItemType(Type type) {
this.itemType = type;
}
}
public static Pair<Type, Integer> fromThrift(TTypeDesc typeDesc, int nodeIdx) throws InternalException {
TTypeNode node = typeDesc.getTypes().get(nodeIdx);
Type type = null;
switch (node.getType()) {
case SCALAR: {
Preconditions.checkState(node.isSetScalarType());
TScalarType scalarType = node.getScalarType();
if (scalarType.getType() == TPrimitiveType.CHAR) {
Preconditions.checkState(scalarType.isSetLen());
type = ScalarType.createCharType(scalarType.getLen());
} else if (scalarType.getType() == TPrimitiveType.VARCHAR) {
Preconditions.checkState(scalarType.isSetLen());
type = ScalarType.createVarcharType(scalarType.getLen());
} else if (scalarType.getType() == TPrimitiveType.DECIMALV2) {
Preconditions.checkState(scalarType.isSetPrecision()
&& scalarType.isSetScale());
type = ScalarType.createDecimalType(scalarType.getPrecision(),
scalarType.getScale());
} else if (scalarType.getType() == TPrimitiveType.DECIMAL32
|| scalarType.getType() == TPrimitiveType.DECIMAL64
|| scalarType.getType() == TPrimitiveType.DECIMAL128I) {
Preconditions.checkState(scalarType.isSetPrecision()
&& scalarType.isSetScale());
type = ScalarType.createDecimalV3Type(scalarType.getPrecision(),
scalarType.getScale());
} else {
type = ScalarType.createType(
PrimitiveType.fromThrift(scalarType.getType()));
}
break;
}
case ARRAY: {
Preconditions.checkState(nodeIdx + 1 < typeDesc.getTypesSize());
Pair<Type, Integer> childType = fromThrift(typeDesc, nodeIdx + 1);
type = new ArrayType(childType.first);
nodeIdx = childType.second;
break;
}
default:
throw new InternalException("Return type " + node.getType() + " is not supported now!");
}
return Pair.of(type, nodeIdx);
}
public static long getAddressAtOffset(long base, int offset) {
return base + 8L * offset;
}
public static void copyMemory(
Object src, long srcOffset, Object dst, long dstOffset, long length) {
// Check if dstOffset is before or after srcOffset to determine if we should copy
// forward or backwards. This is necessary in case src and dst overlap.
if (dstOffset < srcOffset) {
while (length > 0) {
long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
length -= size;
srcOffset += size;
dstOffset += size;
}
} else {
srcOffset += length;
dstOffset += length;
while (length > 0) {
long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
srcOffset -= size;
dstOffset -= size;
UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
length -= size;
}
}
}
public static URLClassLoader getClassLoader(String jarPath, ClassLoader parent)
throws MalformedURLException, FileNotFoundException {
File file = new File(jarPath);
if (!file.exists()) {
throw new FileNotFoundException("Can not find local file: " + jarPath);
}
URL url = file.toURI().toURL();
return URLClassLoader.newInstance(new URL[] {url}, parent);
}
/**
* Sets the return type of a Java UDF. Returns true if the return type is compatible
* with the return type from the function definition. Throws an UdfRuntimeException
* if the return type is not supported.
*/
public static Pair<Boolean, JavaUdfDataType> setReturnType(Type retType, Class<?> udfReturnType)
throws InternalException {
if (!JavaUdfDataType.isSupported(retType)) {
throw new InternalException("Unsupported return type: " + retType.toSql());
}
Set<JavaUdfDataType> javaTypes = JavaUdfDataType.getCandidateTypes(udfReturnType);
// Check if the evaluate method return type is compatible with the return type from
// the function definition. This happens when both of them map to the same primitive
// type.
Object[] res = javaTypes.stream().filter(
t -> t.getPrimitiveType() == retType.getPrimitiveType().toThrift()).toArray();
JavaUdfDataType result = res.length == 0 ? javaTypes.iterator().next() : (JavaUdfDataType) res[0];
if (retType.isDecimalV3() || retType.isDatetimeV2()) {
result.setPrecision(retType.getPrecision());
result.setScale(((ScalarType) retType).getScalarScale());
} else if (retType.isArrayType()) {
ArrayType arrType = (ArrayType) retType;
result.setItemType(arrType.getItemType());
if (arrType.getItemType().isDatetimeV2() || arrType.getItemType().isDecimalV3()) {
result.setPrecision(arrType.getItemType().getPrecision());
result.setScale(((ScalarType) arrType.getItemType()).getScalarScale());
}
}
return Pair.of(res.length != 0, result);
}
/**
* Sets the argument types of a Java UDF or UDAF. Returns true if the argument types specified
* in the UDF are compatible with the argument types of the evaluate() function loaded
* from the associated JAR file.
*/
public static Pair<Boolean, JavaUdfDataType[]> setArgTypes(Type[] parameterTypes, Class<?>[] udfArgTypes,
boolean isUdaf) {
JavaUdfDataType[] inputArgTypes = new JavaUdfDataType[parameterTypes.length];
int firstPos = isUdaf ? 1 : 0;
for (int i = 0; i < parameterTypes.length; ++i) {
Set<JavaUdfDataType> javaTypes = JavaUdfDataType.getCandidateTypes(udfArgTypes[i + firstPos]);
int finalI = i;
Object[] res = javaTypes.stream().filter(
t -> t.getPrimitiveType() == parameterTypes[finalI].getPrimitiveType().toThrift()).toArray();
inputArgTypes[i] = res.length == 0 ? javaTypes.iterator().next() : (JavaUdfDataType) res[0];
if (parameterTypes[finalI].isDecimalV3() || parameterTypes[finalI].isDatetimeV2()) {
inputArgTypes[i].setPrecision(parameterTypes[finalI].getPrecision());
inputArgTypes[i].setScale(((ScalarType) parameterTypes[finalI]).getScalarScale());
} else if (parameterTypes[finalI].isArrayType()) {
ArrayType arrType = (ArrayType) parameterTypes[finalI];
inputArgTypes[i].setItemType(arrType.getItemType());
}
if (res.length == 0) {
return Pair.of(false, inputArgTypes);
}
}
return Pair.of(true, inputArgTypes);
}
public static Object convertDateTimeV2ToJavaDateTime(long date, Class clz) {
int year = (int) (date >> 46);
int yearMonth = (int) (date >> 42);
int yearMonthDay = (int) (date >> 37);
int month = (yearMonth & 0XF);
int day = (yearMonthDay & 0X1F);
int hour = (int) ((date >> 32) & 0X1F);
int minute = (int) ((date >> 26) & 0X3F);
int second = (int) ((date >> 20) & 0X3F);
//here don't need those bits are type = ((minus_type_neg >> 1) & 0x7);
if (LocalDateTime.class.equals(clz)) {
return convertToLocalDateTime(year, month, day, hour, minute, second);
} else if (org.joda.time.DateTime.class.equals(clz)) {
return convertToJodaDateTime(year, month, day, hour, minute, second);
} else if (org.joda.time.LocalDateTime.class.equals(clz)) {
return convertToJodaLocalDateTime(year, month, day, hour, minute, second);
} else {
return null;
}
}
/**
* input is a 64bit num from backend, and then get year, month, day, hour, minus, second by the order of bits.
*/
public static Object convertDateTimeToJavaDateTime(long date, Class clz) {
int year = (int) (date >> 48);
int yearMonth = (int) (date >> 40);
int yearMonthDay = (int) (date >> 32);
int month = (yearMonth & 0XFF);
int day = (yearMonthDay & 0XFF);
int hourMinuteSecond = (int) (date % (1 << 31));
int minuteTypeNeg = (hourMinuteSecond % (1 << 16));
int hour = (hourMinuteSecond >> 24);
int minute = ((hourMinuteSecond >> 16) & 0XFF);
int second = (minuteTypeNeg >> 4);
//here don't need those bits are type = ((minus_type_neg >> 1) & 0x7);
if (LocalDateTime.class.equals(clz)) {
return convertToLocalDateTime(year, month, day, hour, minute, second);
} else if (org.joda.time.DateTime.class.equals(clz)) {
return convertToJodaDateTime(year, month, day, hour, minute, second);
} else if (org.joda.time.LocalDateTime.class.equals(clz)) {
return convertToJodaLocalDateTime(year, month, day, hour, minute, second);
} else {
return null;
}
}
public static Object convertDateV2ToJavaDate(int date, Class clz) {
int year = date >> 9;
int month = (date >> 5) & 0XF;
int day = date & 0X1F;
if (LocalDate.class.equals(clz)) {
return convertToLocalDate(year, month, day);
} else if (java.util.Date.class.equals(clz)) {
return convertToJavaDate(year, month, day);
} else if (org.joda.time.LocalDate.class.equals(clz)) {
return convertToJodaDate(year, month, day);
} else {
return null;
}
}
public static LocalDateTime convertToLocalDateTime(int year, int month, int day,
int hour, int minute, int second) {
LocalDateTime value = null;
try {
value = LocalDateTime.of(year, month, day, hour, minute, second);
} catch (DateTimeException e) {
LOG.warn("Error occurs when parsing date time value: {}", e);
}
return value;
}
public static org.joda.time.DateTime convertToJodaDateTime(int year, int month, int day,
int hour, int minute, int second) {
try {
return new org.joda.time.DateTime(year, month, day, hour, minute, second);
} catch (Exception e) {
return null;
}
}
public static org.joda.time.LocalDateTime convertToJodaLocalDateTime(int year, int month, int day,
int hour, int minute, int second) {
try {
return new org.joda.time.LocalDateTime(year, month, day, hour, minute, second);
} catch (Exception e) {
return null;
}
}
public static Object convertDateToJavaDate(long date, Class clz) {
int year = (int) (date >> 48);
int yearMonth = (int) (date >> 40);
int yearMonthDay = (int) (date >> 32);
int month = (yearMonth & 0XFF);
int day = (yearMonthDay & 0XFF);
if (LocalDate.class.equals(clz)) {
return convertToLocalDate(year, month, day);
} else if (java.util.Date.class.equals(clz)) {
return convertToJavaDate(year, month, day);
} else if (org.joda.time.LocalDate.class.equals(clz)) {
return convertToJodaDate(year, month, day);
} else {
return null;
}
}
/**
* a 64bit num convertToDate.
*/
public static LocalDate convertToLocalDate(int year, int month, int day) {
LocalDate value = null;
try {
value = LocalDate.of(year, month, day);
} catch (DateTimeException e) {
LOG.warn("Error occurs when parsing date value: {}", e);
}
return value;
}
public static org.joda.time.LocalDate convertToJodaDate(int year, int month, int day) {
try {
return new org.joda.time.LocalDate(year, month, day);
} catch (Exception e) {
return null;
}
}
public static java.util.Date convertToJavaDate(int year, int month, int day) {
try {
return new java.util.Date(year - 1900, month - 1, day);
} catch (Exception e) {
return null;
}
}
/**
* input is the second, minute, hours, day , month and year respectively.
* and then combining all num to a 64bit value return to backend;
*/
public static long convertToDateTime(Object obj, Class clz) {
if (LocalDateTime.class.equals(clz)) {
LocalDateTime date = (LocalDateTime) obj;
return convertToDateTime(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), date.getHour(),
date.getMinute(), date.getSecond(), false);
} else if (org.joda.time.DateTime.class.equals(clz)) {
org.joda.time.DateTime date = (org.joda.time.DateTime) obj;
return convertToDateTime(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(),
date.getMinuteOfHour(), date.getSecondOfMinute(), false);
} else if (org.joda.time.LocalDateTime.class.equals(clz)) {
org.joda.time.LocalDateTime date = (org.joda.time.LocalDateTime) obj;
return convertToDateTime(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(),
date.getMinuteOfHour(), date.getSecondOfMinute(), false);
} else {
return 0;
}
}
public static long convertToDate(Object obj, Class clz) {
if (LocalDate.class.equals(clz)) {
LocalDate date = (LocalDate) obj;
return convertToDateTime(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), 0,
0, 0, true);
} else if (java.util.Date.class.equals(clz)) {
java.util.Date date = (java.util.Date) obj;
return convertToDateTime(date.getYear() + 1900, date.getMonth(), date.getDay(), 0,
0, 0, true);
} else if (org.joda.time.LocalDate.class.equals(clz)) {
org.joda.time.LocalDate date = (org.joda.time.LocalDate) obj;
return convertToDateTime(date.getYear(), date.getDayOfMonth(), date.getDayOfMonth(), 0,
0, 0, true);
} else {
return 0;
}
}
public static long convertToDateTime(int year, int month, int day, int hour, int minute, int second,
boolean isDate) {
long time = 0;
time = time + year;
time = (time << 8) + month;
time = (time << 8) + day;
time = (time << 8) + hour;
time = (time << 8) + minute;
time = (time << 12) + second;
int type = isDate ? 2 : 3;
time = (time << 3) + type;
//this bit is int neg = 0;
time = (time << 1);
return time;
}
public static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second) {
return (long) second << 20 | (long) minute << 26 | (long) hour << 32
| (long) day << 37 | (long) month << 42 | (long) year << 46;
}
public static long convertToDateTimeV2(
int year, int month, int day, int hour, int minute, int second, int microsecond) {
return (long) microsecond | (long) second << 20 | (long) minute << 26 | (long) hour << 32
| (long) day << 37 | (long) month << 42 | (long) year << 46;
}
public static long convertToDateTimeV2(Object obj, Class clz) {
if (LocalDateTime.class.equals(clz)) {
LocalDateTime date = (LocalDateTime) obj;
return convertToDateTimeV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), date.getHour(),
date.getMinute(), date.getSecond());
} else if (org.joda.time.DateTime.class.equals(clz)) {
org.joda.time.DateTime date = (org.joda.time.DateTime) obj;
return convertToDateTimeV2(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(),
date.getMinuteOfHour(), date.getSecondOfMinute(), date.getMillisOfSecond() * 1000);
} else if (org.joda.time.LocalDateTime.class.equals(clz)) {
org.joda.time.LocalDateTime date = (org.joda.time.LocalDateTime) obj;
return convertToDateTimeV2(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(),
date.getMinuteOfHour(), date.getSecondOfMinute(), date.getMillisOfSecond() * 1000);
} else {
return 0;
}
}
public static int convertToDateV2(int year, int month, int day) {
return (int) (day | (long) month << 5 | (long) year << 9);
}
public static int convertToDateV2(Object obj, Class clz) {
if (LocalDate.class.equals(clz)) {
LocalDate date = (LocalDate) obj;
return convertToDateV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth());
} else if (java.util.Date.class.equals(clz)) {
java.util.Date date = (java.util.Date) obj;
return convertToDateV2(date.getYear(), date.getMonth(), date.getDay());
} else if (org.joda.time.LocalDate.class.equals(clz)) {
org.joda.time.LocalDate date = (org.joda.time.LocalDate) obj;
return convertToDateV2(date.getYear(), date.getDayOfMonth(), date.getDayOfMonth());
} else {
return 0;
}
}
/**
* Change the order of the bytes, Because JVM is Big-Endian , x86 is Little-Endian.
*/
public static byte[] convertByteOrder(byte[] bytes) {
int length = bytes.length;
for (int i = 0; i < length / 2; ++i) {
byte temp = bytes[i];
bytes[i] = bytes[length - 1 - i];
bytes[length - 1 - i] = temp;
}
return bytes;
}
// only used by nebula-graph
// transfer to an object that can copy to the block
public static Object convertObject(ValueWrapper value) {
try {
if (value.isLong()) {
return value.asLong();
}
if (value.isBoolean()) {
return value.asBoolean();
}
if (value.isDouble()) {
return value.asDouble();
}
if (value.isString()) {
return value.asString();
}
if (value.isTime()) {
return value.asTime().toString();
}
if (value.isDate()) {
DateWrapper date = value.asDate();
return LocalDate.of(date.getYear(), date.getMonth(), date.getDay());
}
if (value.isDateTime()) {
DateTimeWrapper dateTime = value.asDateTime();
return LocalDateTime.of(dateTime.getYear(), dateTime.getMonth(), dateTime.getDay(),
dateTime.getHour(), dateTime.getMinute(), dateTime.getSecond(), dateTime.getMicrosec() * 1000);
}
if (value.isVertex()) {
return value.asNode().toString();
}
if (value.isEdge()) {
return value.asRelationship().toString();
}
if (value.isPath()) {
return value.asPath().toString();
}
if (value.isList()) {
return value.asList().toString();
}
if (value.isSet()) {
return value.asSet().toString();
}
if (value.isMap()) {
return value.asMap().toString();
}
return null;
} catch (Exception e) {
return null;
}
}
}

View File

@ -0,0 +1,373 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.vec;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Column type for fields in vector table. Support complex nested types.
* date & datetime is deprecated, use datev2 & datetimev2 only.
* If decimalv2 is deprecated, we can unify decimal32 & decimal64 & decimal128 into decimal.
*/
public class ColumnType {
public static final int MAX_DECIMAL32_PRECISION = 9;
public static final int MAX_DECIMAL64_PRECISION = 18;
public static final int MAX_DECIMAL128_PRECISION = 38;
public enum Type {
UNSUPPORTED(-1),
BYTE(1), // only for string, generated as array<byte>
BOOLEAN(1),
TINYINT(1),
SMALLINT(2),
INT(4),
BIGINT(8),
LARGEINT(16),
FLOAT(4),
DOUBLE(8),
DATEV2(4),
DATETIMEV2(8),
CHAR(-1),
VARCHAR(-1),
BINARY(-1),
DECIMALV2(16),
DECIMAL32(4),
DECIMAL64(8),
DECIMAL128(16),
STRING(-1),
ARRAY(-1),
MAP(-1),
STRUCT(-1);
public final int size;
Type(int size) {
this.size = size;
}
}
private final Type type;
private final String name;
private List<String> childNames;
private List<ColumnType> childTypes;
private List<Integer> fieldIndex;
// only used in char & varchar
private final int length;
// only used in decimal
private final int precision;
private final int scale;
public ColumnType(String name, Type type) {
this.name = name;
this.type = type;
this.length = -1;
this.precision = -1;
this.scale = -1;
}
public ColumnType(String name, Type type, int length) {
this.name = name;
this.type = type;
this.length = length;
this.precision = -1;
this.scale = -1;
}
public ColumnType(String name, Type type, int precision, int scale) {
this.name = name;
this.type = type;
this.length = -1;
this.precision = precision;
this.scale = scale;
}
public ColumnType(String name, Type type, int length, int precision, int scale) {
this.name = name;
this.type = type;
this.length = length;
this.precision = precision;
this.scale = scale;
}
public List<String> getChildNames() {
return childNames;
}
public void setChildNames(List<String> childNames) {
this.childNames = childNames;
}
public List<ColumnType> getChildTypes() {
return childTypes;
}
public void setChildTypes(List<ColumnType> childTypes) {
this.childTypes = childTypes;
}
public List<Integer> getFieldIndex() {
return fieldIndex;
}
public void setFieldIndex(List<Integer> fieldIndex) {
this.fieldIndex = fieldIndex;
}
public int getTypeSize() {
return type.size;
}
public boolean isUnsupported() {
return type == Type.UNSUPPORTED;
}
public boolean isStringType() {
return type == Type.STRING || type == Type.BINARY || type == Type.CHAR || type == Type.VARCHAR;
}
public boolean isComplexType() {
return type == Type.ARRAY || type == Type.MAP || type == Type.STRUCT;
}
public boolean isArray() {
return type == Type.ARRAY;
}
public boolean isMap() {
return type == Type.MAP;
}
public boolean isStruct() {
return type == Type.STRUCT;
}
public Type getType() {
return type;
}
public String getName() {
return name;
}
public int getLength() {
return length;
}
public int getPrecision() {
return precision;
}
public int getScale() {
return scale;
}
public int metaSize() {
switch (type) {
case UNSUPPORTED:
// set nullMap address as 0.
return 1;
case ARRAY:
case MAP:
case STRUCT:
// array & map : [nullMap | offsets | ... ]
// struct : [nullMap | ... ]
int size = 2;
if (type == Type.STRUCT) {
size = 1;
}
for (ColumnType c : childTypes) {
size += c.metaSize();
}
return size;
case STRING:
case BINARY:
case CHAR:
case VARCHAR:
// [nullMap | offsets | data ]
return 3;
default:
// [nullMap | data]
return 2;
}
}
private static final Pattern digitPattern = Pattern.compile("(\\d+)");
private static int findNextNestedField(String commaSplitFields) {
int numLess = 0;
int numBracket = 0;
for (int i = 0; i < commaSplitFields.length(); i++) {
char c = commaSplitFields.charAt(i);
if (c == '<') {
numLess++;
} else if (c == '>') {
numLess--;
} else if (c == '(') {
numBracket++;
} else if (c == ')') {
numBracket--;
} else if (c == ',' && numLess == 0 && numBracket == 0) {
return i;
}
}
return commaSplitFields.length();
}
public static ColumnType parseType(String columnName, String hiveType) {
String lowerCaseType = hiveType.toLowerCase();
Type type = Type.UNSUPPORTED;
int length = -1;
int precision = -1;
int scale = -1;
switch (lowerCaseType) {
case "boolean":
type = Type.BOOLEAN;
break;
case "tinyint":
type = Type.TINYINT;
break;
case "smallint":
type = Type.SMALLINT;
break;
case "int":
type = Type.INT;
break;
case "bigint":
type = Type.BIGINT;
break;
case "largeint":
type = Type.LARGEINT;
break;
case "float":
type = Type.FLOAT;
break;
case "double":
type = Type.DOUBLE;
break;
case "date":
type = Type.DATEV2;
break;
case "binary":
type = Type.BINARY;
break;
case "string":
type = Type.STRING;
break;
default:
if (lowerCaseType.startsWith("timestamp")) {
type = Type.DATETIMEV2;
precision = 6; // default
Matcher match = digitPattern.matcher(lowerCaseType);
if (match.find()) {
precision = Integer.parseInt(match.group(1));
}
} else if (lowerCaseType.startsWith("char")) {
Matcher match = digitPattern.matcher(lowerCaseType);
if (match.find()) {
type = Type.CHAR;
length = Integer.parseInt(match.group(1));
}
} else if (lowerCaseType.startsWith("varchar")) {
Matcher match = digitPattern.matcher(lowerCaseType);
if (match.find()) {
type = Type.VARCHAR;
length = Integer.parseInt(match.group(1));
}
} else if (lowerCaseType.startsWith("decimal")) {
int s = lowerCaseType.indexOf('(');
int e = lowerCaseType.indexOf(')');
if (s != -1 && e != -1) {
String[] ps = lowerCaseType.substring(s + 1, e).split(",");
precision = Integer.parseInt(ps[0]);
scale = Integer.parseInt(ps[1]);
if (lowerCaseType.startsWith("decimalv2")) {
type = Type.DECIMALV2;
} else if (lowerCaseType.startsWith("decimal32")) {
type = Type.DECIMAL32;
} else if (lowerCaseType.startsWith("decimal64")) {
type = Type.DECIMAL64;
} else if (lowerCaseType.startsWith("decimal128")) {
type = Type.DECIMAL128;
} else {
if (precision <= MAX_DECIMAL32_PRECISION) {
type = Type.DECIMAL32;
} else if (precision <= MAX_DECIMAL64_PRECISION) {
type = Type.DECIMAL64;
} else {
type = Type.DECIMAL128;
}
}
}
} else if (lowerCaseType.startsWith("array")) {
if (lowerCaseType.indexOf("<") == 5
&& lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) {
ColumnType nestedType = parseType("element",
lowerCaseType.substring(6, lowerCaseType.length() - 1));
ColumnType arrayType = new ColumnType(columnName, Type.ARRAY);
arrayType.setChildTypes(Collections.singletonList(nestedType));
return arrayType;
}
} else if (lowerCaseType.startsWith("map")) {
if (lowerCaseType.indexOf("<") == 3
&& lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) {
String keyValue = lowerCaseType.substring(4, lowerCaseType.length() - 1);
int index = findNextNestedField(keyValue);
if (index != keyValue.length() && index != 0) {
ColumnType keyType = parseType("key", keyValue.substring(0, index));
ColumnType valueType = parseType("value", keyValue.substring(index + 1));
ColumnType mapType = new ColumnType(columnName, Type.MAP);
mapType.setChildTypes(Arrays.asList(keyType, valueType));
return mapType;
}
}
} else if (lowerCaseType.startsWith("struct")) {
if (lowerCaseType.indexOf("<") == 6
&& lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) {
String listFields = lowerCaseType.substring(7, lowerCaseType.length() - 1);
ArrayList<ColumnType> fields = new ArrayList<>();
ArrayList<String> names = new ArrayList<>();
while (listFields.length() > 0) {
int index = findNextNestedField(listFields);
int pivot = listFields.indexOf(':');
if (pivot > 0 && pivot < listFields.length() - 1) {
fields.add(parseType(listFields.substring(0, pivot),
listFields.substring(pivot + 1, index)));
names.add(listFields.substring(0, pivot));
listFields = listFields.substring(Math.min(index + 1, listFields.length()));
} else {
break;
}
}
if (listFields.isEmpty()) {
ColumnType structType = new ColumnType(columnName, Type.STRUCT);
structType.setChildTypes(fields);
structType.setChildNames(names);
return structType;
}
}
}
break;
}
return new ColumnType(columnName, type, length, precision, scale);
}
}

View File

@ -0,0 +1,66 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.vec;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.List;
/**
* Column value in vector column
*/
public interface ColumnValue {
boolean isNull();
boolean getBoolean();
// tinyint
byte getByte();
// smallint
short getShort();
int getInt();
float getFloat();
// bigint
long getLong();
double getDouble();
BigInteger getBigInteger();
BigDecimal getDecimal();
String getString();
LocalDate getDate();
LocalDateTime getDateTime();
byte[] getBytes();
void unpackArray(List<ColumnValue> values);
void unpackMap(List<ColumnValue> keys, List<ColumnValue> values);
void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> values);
}

View File

@ -0,0 +1,288 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.vec;
import org.apache.doris.common.jni.utils.OffHeap;
import org.apache.doris.common.jni.utils.TypeNativeBytes;
import org.apache.doris.common.jni.vec.ColumnType.Type;
import org.apache.commons.lang3.StringUtils;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Reference to doris::JniConnector::ScanPredicate
*/
public class ScanPredicate {
public enum FilterOp {
FILTER_LARGER(">"),
FILTER_LARGER_OR_EQUAL(">="),
FILTER_LESS("<"),
FILTER_LESS_OR_EQUAL("<="),
FILTER_IN("in"),
FILTER_NOT_IN("not in");
public final String op;
FilterOp(String op) {
this.op = op;
}
}
private static FilterOp parseFilterOp(int op) {
switch (op) {
case 0:
return FilterOp.FILTER_LARGER;
case 1:
return FilterOp.FILTER_LARGER_OR_EQUAL;
case 2:
return FilterOp.FILTER_LESS;
case 3:
return FilterOp.FILTER_LESS_OR_EQUAL;
case 4:
return FilterOp.FILTER_IN;
default:
return FilterOp.FILTER_NOT_IN;
}
}
public static class PredicateValue implements ColumnValue {
private final byte[] valueBytes;
private final ColumnType.Type type;
private final int scale;
public PredicateValue(byte[] valueBytes, ColumnType.Type type, int scale) {
this.valueBytes = valueBytes;
this.type = type;
this.scale = scale;
}
private Object inspectObject() {
ByteBuffer byteBuffer = ByteBuffer.wrap(
TypeNativeBytes.convertByteOrder(Arrays.copyOf(valueBytes, valueBytes.length)));
switch (type) {
case BOOLEAN:
return byteBuffer.get() == 1;
case TINYINT:
return byteBuffer.get();
case SMALLINT:
return byteBuffer.getShort();
case INT:
return byteBuffer.getInt();
case BIGINT:
return byteBuffer.getLong();
case LARGEINT:
return TypeNativeBytes.getBigInteger(Arrays.copyOf(valueBytes, valueBytes.length));
case FLOAT:
return byteBuffer.getFloat();
case DOUBLE:
return byteBuffer.getDouble();
case DECIMALV2:
case DECIMAL32:
case DECIMAL64:
case DECIMAL128:
return TypeNativeBytes.getDecimal(Arrays.copyOf(valueBytes, valueBytes.length), scale);
case CHAR:
case VARCHAR:
case STRING:
return new String(valueBytes, StandardCharsets.UTF_8);
case BINARY:
return valueBytes;
default:
return new Object();
}
}
@Override
public String toString() {
return inspectObject().toString();
}
@Override
public boolean isNull() {
return false;
}
@Override
public boolean getBoolean() {
return (boolean) inspectObject();
}
@Override
public byte getByte() {
return (byte) inspectObject();
}
@Override
public short getShort() {
return (short) inspectObject();
}
@Override
public int getInt() {
return (int) inspectObject();
}
@Override
public float getFloat() {
return (float) inspectObject();
}
@Override
public long getLong() {
return (long) inspectObject();
}
@Override
public double getDouble() {
return (double) inspectObject();
}
@Override
public BigInteger getBigInteger() {
return (BigInteger) inspectObject();
}
@Override
public BigDecimal getDecimal() {
return (BigDecimal) inspectObject();
}
@Override
public String getString() {
return toString();
}
@Override
public LocalDate getDate() {
return LocalDate.now();
}
@Override
public LocalDateTime getDateTime() {
return LocalDateTime.now();
}
@Override
public byte[] getBytes() {
return (byte[]) inspectObject();
}
@Override
public void unpackArray(List<ColumnValue> values) {
}
@Override
public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
}
@Override
public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> values) {
}
}
private final long bytesLength;
public final String columName;
public final ColumnType.Type type;
public final FilterOp op;
private final byte[][] values;
public final int scale;
private ScanPredicate(long predicateAddress, Map<String, ColumnType.Type> nameToType) {
long address = predicateAddress;
int length = OffHeap.getInt(null, address);
address += 4;
byte[] nameBytes = new byte[length];
OffHeap.copyMemory(null, address, nameBytes, OffHeap.BYTE_ARRAY_OFFSET, length);
columName = new String(nameBytes, StandardCharsets.UTF_8);
type = nameToType.getOrDefault(columName, Type.UNSUPPORTED);
address += length;
op = parseFilterOp(OffHeap.getInt(null, address));
address += 4;
scale = OffHeap.getInt(null, address);
address += 4;
int numValues = OffHeap.getInt(null, address);
address += 4;
values = new byte[numValues][];
for (int i = 0; i < numValues; i++) {
int valueLength = OffHeap.getInt(null, address);
address += 4;
byte[] valueBytes = new byte[valueLength];
OffHeap.copyMemory(null, address, valueBytes, OffHeap.BYTE_ARRAY_OFFSET, valueLength);
address += valueLength;
values[i] = valueBytes;
}
bytesLength = address - predicateAddress;
}
public PredicateValue[] predicateValues() {
PredicateValue[] result = new PredicateValue[values.length];
for (int i = 0; i < values.length; i++) {
result[i] = new PredicateValue(values[i], type, scale);
}
return result;
}
public static ScanPredicate[] parseScanPredicates(long predicatesAddress, ColumnType[] types) {
Map<String, ColumnType.Type> nameToType = new HashMap<>();
for (ColumnType columnType : types) {
nameToType.put(columnType.getName(), columnType.getType());
}
int numPredicates = OffHeap.getInt(null, predicatesAddress);
long nextPredicateAddress = predicatesAddress + 4;
ScanPredicate[] predicates = new ScanPredicate[numPredicates];
for (int i = 0; i < numPredicates; i++) {
predicates[i] = new ScanPredicate(nextPredicateAddress, nameToType);
nextPredicateAddress += predicates[i].bytesLength;
}
return predicates;
}
public void dump(StringBuilder sb) {
sb.append(columName).append(' ').append(op.op).append(' ');
if (op == FilterOp.FILTER_IN || op == FilterOp.FILTER_NOT_IN) {
sb.append('(').append(StringUtils.join(predicateValues(), ", ")).append(')');
} else {
sb.append(predicateValues()[0]);
}
}
public static String dump(ScanPredicate[] scanPredicates) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < scanPredicates.length; i++) {
if (i != 0) {
sb.append(" and ");
}
scanPredicates[i].dump(sb);
}
return sb.toString();
}
}

View File

@ -0,0 +1,665 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.vec;
import org.apache.doris.common.jni.utils.OffHeap;
import org.apache.doris.common.jni.utils.TypeNativeBytes;
import org.apache.doris.common.jni.vec.ColumnType.Type;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.List;
/**
* Reference to Apache Spark
* see <a href="https://github.com/apache/spark/blob/master/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java">WritableColumnVector</a>
*/
public class VectorColumn {
// String is stored as array<byte>
// The default string length to initialize the capacity.
private static final int DEFAULT_STRING_LENGTH = 4;
// NullMap column address
private long nullMap;
// Data column address
private long data;
// For String / Array / Map.
private long offsets;
// Number of elements in vector column
private int capacity;
// Upper limit for the maximum capacity for this column.
private static final int MAX_CAPACITY = Integer.MAX_VALUE - 15;
private final ColumnType columnType;
private int numNulls;
private int appendIndex;
// For nested column type: String / Array/ Map / Struct
private VectorColumn[] childColumns;
public VectorColumn(ColumnType columnType, int capacity) {
this.columnType = columnType;
this.capacity = 0;
this.nullMap = 0;
this.data = 0;
this.offsets = 0;
this.numNulls = 0;
this.appendIndex = 0;
if (columnType.isComplexType()) {
List<ColumnType> children = columnType.getChildTypes();
childColumns = new VectorColumn[children.size()];
for (int i = 0; i < children.size(); ++i) {
childColumns[i] = new VectorColumn(children.get(i), capacity);
}
} else if (columnType.isStringType()) {
childColumns = new VectorColumn[1];
childColumns[0] = new VectorColumn(new ColumnType("#stringBytes", Type.BYTE),
capacity * DEFAULT_STRING_LENGTH);
}
reserveCapacity(capacity);
}
// restore the child of string column & restore meta column
public VectorColumn(long address, int capacity, ColumnType columnType) {
this.columnType = columnType;
this.capacity = capacity;
this.nullMap = 0;
this.data = address;
this.offsets = 0;
this.numNulls = 0;
this.appendIndex = capacity;
}
// restore block column
public VectorColumn(ColumnType columnType, int numRows, long columnMetaAddress) {
if (columnType.isUnsupported()) {
throw new RuntimeException("Unsupported type for column: " + columnType.getName());
}
long address = columnMetaAddress;
this.capacity = numRows;
this.columnType = columnType;
this.nullMap = OffHeap.getLong(null, address);
address += 8;
this.numNulls = 0;
if (this.nullMap != 0) {
for (int i = 0; i < numRows; ++i) {
if (isNullAt(i)) {
this.numNulls++;
}
}
}
this.appendIndex = numRows;
if (columnType.isComplexType()) {
// todo: support complex type
throw new RuntimeException("Unhandled type: " + columnType);
} else if (columnType.isStringType()) {
this.offsets = OffHeap.getLong(null, address);
address += 8;
this.data = 0;
int length = OffHeap.getInt(null, this.offsets + (numRows - 1) * 4L);
childColumns = new VectorColumn[1];
childColumns[0] = new VectorColumn(OffHeap.getLong(null, address), length,
new ColumnType("#stringBytes", Type.BYTE));
} else {
this.data = OffHeap.getLong(null, address);
this.offsets = 0;
}
}
public long nullMapAddress() {
return nullMap;
}
public long dataAddress() {
return data;
}
public long offsetAddress() {
return offsets;
}
public ColumnType.Type getColumnTyp() {
return columnType.getType();
}
/**
* Release columns and meta information
*/
public void close() {
if (childColumns != null) {
for (int i = 0; i < childColumns.length; i++) {
childColumns[i].close();
childColumns[i] = null;
}
childColumns = null;
}
if (nullMap != 0) {
OffHeap.freeMemory(nullMap);
}
if (data != 0) {
OffHeap.freeMemory(data);
}
if (offsets != 0) {
OffHeap.freeMemory(offsets);
}
nullMap = 0;
data = 0;
offsets = 0;
capacity = 0;
numNulls = 0;
appendIndex = 0;
}
private void throwReserveException(int requiredCapacity, Throwable cause) {
String message = "Cannot reserve enough bytes in off heap memory ("
+ (requiredCapacity >= 0 ? "requested " + requiredCapacity + " bytes" : "integer overflow).");
throw new RuntimeException(message, cause);
}
private void reserve(int requiredCapacity) {
if (requiredCapacity < 0) {
throwReserveException(requiredCapacity, null);
} else if (requiredCapacity > capacity) {
int newCapacity = (int) Math.min(MAX_CAPACITY, requiredCapacity * 2L);
if (requiredCapacity <= newCapacity) {
try {
reserveCapacity(newCapacity);
} catch (OutOfMemoryError outOfMemoryError) {
throwReserveException(requiredCapacity, outOfMemoryError);
}
} else {
// overflow
throwReserveException(requiredCapacity, null);
}
}
}
private void reserveCapacity(int newCapacity) {
long oldCapacity = capacity;
long oldOffsetSize = capacity * 4L;
long newOffsetSize = newCapacity * 4L;
long typeSize = columnType.getTypeSize();
if (columnType.isUnsupported()) {
// do nothing
return;
} else if (typeSize != -1) {
this.data = OffHeap.reallocateMemory(data, oldCapacity * typeSize, newCapacity * typeSize);
} else if (columnType.isStringType()) {
this.offsets = OffHeap.reallocateMemory(offsets, oldOffsetSize, newOffsetSize);
} else {
throw new RuntimeException("Unhandled type: " + columnType);
}
// todo: support complex type
if (!"#stringBytes".equals(columnType.getName())) {
this.nullMap = OffHeap.reallocateMemory(nullMap, oldCapacity, newCapacity);
OffHeap.setMemory(nullMap + oldCapacity, (byte) 0, newCapacity - oldCapacity);
}
capacity = newCapacity;
}
public void reset() {
if (childColumns != null) {
for (VectorColumn c : childColumns) {
c.reset();
}
}
appendIndex = 0;
if (numNulls > 0) {
putNotNulls(0, capacity);
numNulls = 0;
}
}
public boolean isNullAt(int rowId) {
if (nullMap == 0) {
return false;
} else {
return OffHeap.getByte(null, nullMap + rowId) == 1;
}
}
public boolean hasNull() {
return numNulls > 0;
}
private void putNotNulls(int rowId, int count) {
if (!hasNull()) {
return;
}
long offset = nullMap + rowId;
for (int i = 0; i < count; ++i, ++offset) {
OffHeap.putByte(null, offset, (byte) 0);
}
}
public int appendNull(ColumnType.Type typeValue) {
reserve(appendIndex + 1);
putNull(appendIndex);
// append default value
switch (typeValue) {
case BOOLEAN:
return appendBoolean(false);
case TINYINT:
return appendByte((byte) 0);
case SMALLINT:
return appendShort((short) 0);
case INT:
return appendInt(0);
case BIGINT:
return appendLong(0);
case LARGEINT:
return appendBigInteger(BigInteger.ZERO);
case FLOAT:
return appendFloat(0);
case DOUBLE:
return appendDouble(0);
case DECIMALV2:
case DECIMAL32:
case DECIMAL64:
case DECIMAL128:
return appendDecimal(new BigDecimal(0));
case DATEV2:
return appendDate(LocalDate.MIN);
case DATETIMEV2:
return appendDateTime(LocalDateTime.MIN);
case CHAR:
case VARCHAR:
case STRING:
case BINARY:
return appendBytesAndOffset(new byte[0]);
default:
throw new RuntimeException("Unknown type value: " + typeValue);
}
}
private void putNull(int rowId) {
OffHeap.putByte(null, nullMap + rowId, (byte) 1);
++numNulls;
}
public int appendBoolean(boolean v) {
reserve(appendIndex + 1);
putBoolean(appendIndex, v);
return appendIndex++;
}
private void putBoolean(int rowId, boolean value) {
OffHeap.putByte(null, data + rowId, (byte) ((value) ? 1 : 0));
}
public boolean getBoolean(int rowId) {
return OffHeap.getByte(null, data + rowId) == 1;
}
public int appendByte(byte v) {
reserve(appendIndex + 1);
putByte(appendIndex, v);
return appendIndex++;
}
public void putByte(int rowId, byte value) {
OffHeap.putByte(null, data + (long) rowId, value);
}
public byte getByte(int rowId) {
return OffHeap.getByte(null, data + (long) rowId);
}
public int appendShort(short v) {
reserve(appendIndex + 1);
putShort(appendIndex, v);
return appendIndex++;
}
private void putShort(int rowId, short value) {
OffHeap.putShort(null, data + 2L * rowId, value);
}
public short getShort(int rowId) {
return OffHeap.getShort(null, data + 2L * rowId);
}
public int appendInt(int v) {
reserve(appendIndex + 1);
putInt(appendIndex, v);
return appendIndex++;
}
private void putInt(int rowId, int value) {
OffHeap.putInt(null, data + 4L * rowId, value);
}
public int getInt(int rowId) {
return OffHeap.getInt(null, data + 4L * rowId);
}
public int appendFloat(float v) {
reserve(appendIndex + 1);
putFloat(appendIndex, v);
return appendIndex++;
}
private void putFloat(int rowId, float value) {
OffHeap.putFloat(null, data + rowId * 4L, value);
}
public float getFloat(int rowId) {
return OffHeap.getFloat(null, data + rowId * 4L);
}
public int appendLong(long v) {
reserve(appendIndex + 1);
putLong(appendIndex, v);
return appendIndex++;
}
private void putLong(int rowId, long value) {
OffHeap.putLong(null, data + 8L * rowId, value);
}
public long getLong(int rowId) {
return OffHeap.getLong(null, data + 8L * rowId);
}
public int appendDouble(double v) {
reserve(appendIndex + 1);
putDouble(appendIndex, v);
return appendIndex++;
}
private void putDouble(int rowId, double value) {
OffHeap.putDouble(null, data + rowId * 8L, value);
}
public double getDouble(int rowId) {
return OffHeap.getDouble(null, data + rowId * 8L);
}
public int appendBigInteger(BigInteger v) {
reserve(appendIndex + 1);
putBigInteger(appendIndex, v);
return appendIndex++;
}
private void putBigInteger(int rowId, BigInteger v) {
int typeSize = columnType.getTypeSize();
byte[] bytes = TypeNativeBytes.getBigIntegerBytes(v);
OffHeap.copyMemory(bytes, OffHeap.BYTE_ARRAY_OFFSET, null, data + (long) rowId * typeSize, typeSize);
}
public byte[] getBigIntegerBytes(int rowId) {
int typeSize = columnType.getTypeSize();
byte[] bytes = new byte[typeSize];
OffHeap.copyMemory(null, data + (long) rowId * typeSize, bytes, OffHeap.BYTE_ARRAY_OFFSET, typeSize);
return bytes;
}
public BigInteger getBigInteger(int rowId) {
return TypeNativeBytes.getBigInteger(getBigIntegerBytes(rowId));
}
public int appendDecimal(BigDecimal v) {
reserve(appendIndex + 1);
putDecimal(appendIndex, v);
return appendIndex++;
}
private void putDecimal(int rowId, BigDecimal v) {
int typeSize = columnType.getTypeSize();
byte[] bytes = TypeNativeBytes.getDecimalBytes(v, columnType.getScale(), typeSize);
OffHeap.copyMemory(bytes, OffHeap.BYTE_ARRAY_OFFSET, null, data + (long) rowId * typeSize, typeSize);
}
public byte[] getDecimalBytes(int rowId) {
int typeSize = columnType.getTypeSize();
byte[] bytes = new byte[typeSize];
OffHeap.copyMemory(null, data + (long) rowId * typeSize, bytes, OffHeap.BYTE_ARRAY_OFFSET, typeSize);
return bytes;
}
public BigDecimal getDecimal(int rowId) {
return TypeNativeBytes.getDecimal(getDecimalBytes(rowId), columnType.getScale());
}
public int appendDate(LocalDate v) {
reserve(appendIndex + 1);
putDate(appendIndex, v);
return appendIndex++;
}
private void putDate(int rowId, LocalDate v) {
int date = TypeNativeBytes.convertToDateV2(v.getYear(), v.getMonthValue(), v.getDayOfMonth());
OffHeap.putInt(null, data + rowId * 4L, date);
}
public LocalDate getDate(int rowId) {
int date = OffHeap.getInt(null, data + rowId * 4L);
return TypeNativeBytes.convertToJavaDate(date);
}
public int appendDateTime(LocalDateTime v) {
reserve(appendIndex + 1);
putDateTime(appendIndex, v);
return appendIndex++;
}
public LocalDateTime getDateTime(int rowId) {
long time = OffHeap.getLong(null, data + rowId * 8L);
return TypeNativeBytes.convertToJavaDateTime(time);
}
private void putDateTime(int rowId, LocalDateTime v) {
long time = TypeNativeBytes.convertToDateTimeV2(v.getYear(), v.getMonthValue(), v.getDayOfMonth(), v.getHour(),
v.getMinute(), v.getSecond(), v.getNano() / 1000);
OffHeap.putLong(null, data + rowId * 8L, time);
}
private void putBytes(int rowId, byte[] src, int offset, int length) {
OffHeap.copyMemory(src, OffHeap.BYTE_ARRAY_OFFSET + offset, null, data + rowId, length);
}
private byte[] getBytes(int rowId, int length) {
byte[] array = new byte[length];
OffHeap.copyMemory(null, data + rowId, array, OffHeap.BYTE_ARRAY_OFFSET, length);
return array;
}
public int appendBytes(byte[] src, int offset, int length) {
reserve(appendIndex + length);
int result = appendIndex;
putBytes(appendIndex, src, offset, length);
appendIndex += length;
return result;
}
public int appendString(String str) {
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
return appendBytes(bytes, 0, bytes.length);
}
public int appendBytesAndOffset(byte[] src) {
return appendBytesAndOffset(src, 0, src.length);
}
public int appendBytesAndOffset(byte[] src, int offset, int length) {
int startOffset = childColumns[0].appendBytes(src, offset, length);
reserve(appendIndex + 1);
OffHeap.putInt(null, offsets + 4L * appendIndex, startOffset + length);
return appendIndex++;
}
public int appendStringAndOffset(String str) {
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
return appendBytesAndOffset(bytes, 0, bytes.length);
}
public byte[] getBytesWithOffset(int rowId) {
long endOffsetAddress = offsets + 4L * rowId;
int startOffset = rowId == 0 ? 0 : OffHeap.getInt(null, endOffsetAddress - 4);
int endOffset = OffHeap.getInt(null, endOffsetAddress);
return childColumns[0].getBytes(startOffset, endOffset - startOffset);
}
public String getStringWithOffset(int rowId) {
byte[] bytes = getBytesWithOffset(rowId);
return new String(bytes, StandardCharsets.UTF_8);
}
public void updateMeta(VectorColumn meta) {
if (columnType.isUnsupported()) {
meta.appendLong(0);
} else if (columnType.isStringType()) {
meta.appendLong(nullMap);
meta.appendLong(offsets);
meta.appendLong(childColumns[0].data);
} else if (columnType.isComplexType()) {
meta.appendLong(nullMap);
if (columnType.isArray() || columnType.isMap()) {
meta.appendLong(offsets);
}
for (VectorColumn c : childColumns) {
c.updateMeta(meta);
}
} else {
meta.appendLong(nullMap);
meta.appendLong(data);
}
}
public void appendValue(ColumnValue o) {
ColumnType.Type typeValue = columnType.getType();
if (o == null || o.isNull()) {
appendNull(typeValue);
return;
}
switch (typeValue) {
case BOOLEAN:
appendBoolean(o.getBoolean());
break;
case TINYINT:
appendByte(o.getByte());
break;
case SMALLINT:
appendShort(o.getShort());
break;
case INT:
appendInt(o.getInt());
break;
case BIGINT:
appendLong(o.getLong());
break;
case LARGEINT:
appendBigInteger(o.getBigInteger());
break;
case FLOAT:
appendFloat(o.getFloat());
break;
case DOUBLE:
appendDouble(o.getDouble());
break;
case DECIMALV2:
case DECIMAL32:
case DECIMAL64:
case DECIMAL128:
appendDecimal(o.getDecimal());
break;
case DATEV2:
appendDate(o.getDate());
break;
case DATETIMEV2:
appendDateTime(o.getDateTime());
break;
case CHAR:
case VARCHAR:
case STRING:
appendStringAndOffset(o.getString());
break;
case BINARY:
appendBytesAndOffset(o.getBytes());
break;
default:
throw new RuntimeException("Unknown type value: " + typeValue);
}
}
// for test only.
public void dump(StringBuilder sb, int i) {
if (isNullAt(i)) {
sb.append("NULL");
return;
}
ColumnType.Type typeValue = columnType.getType();
switch (typeValue) {
case BOOLEAN:
sb.append(getBoolean(i));
break;
case TINYINT:
sb.append(getByte(i));
break;
case SMALLINT:
sb.append(getShort(i));
break;
case INT:
sb.append(getInt(i));
break;
case BIGINT:
sb.append(getLong(i));
break;
case LARGEINT:
sb.append(getBigInteger(i));
break;
case FLOAT:
sb.append(getFloat(i));
break;
case DOUBLE:
sb.append(getDouble(i));
break;
case DECIMALV2:
case DECIMAL32:
case DECIMAL64:
case DECIMAL128:
sb.append(getDecimal(i));
break;
case DATEV2:
sb.append(getDate(i));
break;
case DATETIMEV2:
sb.append(getDateTime(i));
break;
case CHAR:
case VARCHAR:
case STRING:
case BINARY:
sb.append(getStringWithOffset(i));
break;
default:
throw new RuntimeException("Unknown type value: " + typeValue);
}
}
}

View File

@ -0,0 +1,146 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni.vec;
import org.apache.doris.common.jni.utils.OffHeap;
import org.apache.doris.common.jni.vec.ColumnType.Type;
/**
* Store a batch of data as vector table.
*/
public class VectorTable {
private final VectorColumn[] columns;
private final ColumnType[] columnTypes;
private final String[] fields;
private final ScanPredicate[] predicates;
private final VectorColumn meta;
private int numRows;
private final boolean isRestoreTable;
public VectorTable(ColumnType[] types, String[] fields, ScanPredicate[] predicates, int capacity) {
this.columnTypes = types;
this.fields = fields;
this.columns = new VectorColumn[types.length];
this.predicates = predicates;
int metaSize = 1; // number of rows
for (int i = 0; i < types.length; i++) {
columns[i] = new VectorColumn(types[i], capacity);
metaSize += types[i].metaSize();
}
this.meta = new VectorColumn(new ColumnType("#meta", Type.BIGINT), metaSize);
this.numRows = 0;
this.isRestoreTable = false;
}
public VectorTable(ColumnType[] types, String[] fields, long metaAddress) {
long address = metaAddress;
this.columnTypes = types;
this.fields = fields;
this.columns = new VectorColumn[types.length];
this.predicates = new ScanPredicate[0];
this.numRows = (int) OffHeap.getLong(null, address);
address += 8;
int metaSize = 1; // number of rows
for (int i = 0; i < types.length; i++) {
columns[i] = new VectorColumn(types[i], numRows, address);
metaSize += types[i].metaSize();
address += types[i].metaSize() * 8L;
}
this.meta = new VectorColumn(metaAddress, metaSize, new ColumnType("#meta", Type.BIGINT));
this.isRestoreTable = true;
}
public void appendData(int fieldId, ColumnValue o) {
assert (!isRestoreTable);
columns[fieldId].appendValue(o);
}
public VectorColumn[] getColumns() {
return columns;
}
public VectorColumn getColumn(int fieldId) {
return columns[fieldId];
}
public ColumnType[] getColumnTypes() {
return columnTypes;
}
public String[] getFields() {
return fields;
}
public void releaseColumn(int fieldId) {
assert (!isRestoreTable);
columns[fieldId].close();
}
public void setNumRows(int numRows) {
this.numRows = numRows;
}
public int getNumRows() {
return this.numRows;
}
public long getMetaAddress() {
if (!isRestoreTable) {
meta.reset();
meta.appendLong(numRows);
for (VectorColumn c : columns) {
c.updateMeta(meta);
}
}
return meta.dataAddress();
}
public void reset() {
assert (!isRestoreTable);
for (VectorColumn column : columns) {
column.reset();
}
meta.reset();
}
public void close() {
assert (!isRestoreTable);
for (int i = 0; i < columns.length; i++) {
releaseColumn(i);
}
meta.close();
}
// for test only.
public String dump(int rowLimit) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < rowLimit && i < numRows; i++) {
for (int j = 0; j < columns.length; j++) {
if (j != 0) {
sb.append(", ");
}
columns[j].dump(sb, i);
}
sb.append('\n');
}
return sb.toString();
}
}

View File

@ -0,0 +1,26 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
log4j.rootLogger=INFO, RollingFile
log4j.appender.RollingFile=org.apache.log4j.RollingFileAppender
log4j.appender.RollingFile.Threshold=INFO
log4j.appender.RollingFile.File=${logPath}
log4j.appender.RollingFile.Append=true
log4j.appender.RollingFile.MaxFileSize=10MB
log4j.appender.RollingFile.MaxBackupIndex=5
log4j.appender.RollingFile.layout=org.apache.log4j.PatternLayout
log4j.appender.RollingFile.layout.ConversionPattern= %d{yyyy-MM-dd HH:mm:ss} %5p %t %-5l - %m%n

View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
<id>jar-with-dependencies</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<unpack>true</unpack>
<scope>runtime</scope>
<unpackOptions>
<excludes>
<exclude>**/Log4j2Plugins.dat</exclude>
</excludes>
</unpackOptions>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,61 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.jni;
import org.apache.doris.common.jni.utils.OffHeap;
import org.apache.doris.common.jni.vec.VectorTable;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.util.HashMap;
public class JniScannerTest {
@Test
public void testMockJniScanner() throws IOException {
OffHeap.setTesting();
MockJniScanner scanner = new MockJniScanner(32, new HashMap<String, String>() {
{
put("mock_rows", "128");
put("required_fields", "boolean,tinyint,smallint,int,bigint,largeint,float,double,"
+ "date,timestamp,char,varchar,string,decimalv2,decimal64");
put("columns_types", "boolean#tinyint#smallint#int#bigint#largeint#float#double#"
+ "date#timestamp#char(10)#varchar(10)#string#decimalv2(12,4)#decimal64(10,3)");
}
});
scanner.open();
long metaAddress = 0;
do {
metaAddress = scanner.getNextBatchMeta();
if (metaAddress != 0) {
long rows = OffHeap.getLong(null, metaAddress);
Assert.assertEquals(32, rows);
VectorTable restoreTable = new VectorTable(scanner.getTable().getColumnTypes(),
scanner.getTable().getFields(), metaAddress);
System.out.println(restoreTable.dump((int) rows));
// Restored table is release by the origin table.
}
scanner.resetTable();
} while (metaAddress != 0);
scanner.releaseTable();
scanner.close();
}
}