[feature-wip][UDF][DIP-1] Support variable-size input and output for Java UDF (#8678)
This feature is proposed in DSIP-1. This PR support variable-length input and output Java UDF.
This commit is contained in:
@ -76,27 +76,36 @@ private:
|
||||
jmethodID executor_evaluate_id_;
|
||||
jmethodID executor_close_id_;
|
||||
|
||||
struct IntermediateState {
|
||||
size_t buffer_size;
|
||||
size_t row_idx;
|
||||
};
|
||||
|
||||
struct JniContext {
|
||||
JavaFunctionCall* parent = nullptr;
|
||||
|
||||
jobject executor = nullptr;
|
||||
|
||||
int64_t input_values_buffer_ptr;
|
||||
int64_t input_nulls_buffer_ptr;
|
||||
int64_t input_byte_offsets_ptr;
|
||||
int64_t output_value_buffer;
|
||||
int64_t output_null_value;
|
||||
int64_t batch_size_ptr;
|
||||
std::unique_ptr<int64_t[]> input_values_buffer_ptr;
|
||||
std::unique_ptr<int64_t[]> input_nulls_buffer_ptr;
|
||||
std::unique_ptr<int64_t[]> input_offsets_ptrs;
|
||||
std::unique_ptr<int64_t> output_value_buffer;
|
||||
std::unique_ptr<int64_t> output_null_value;
|
||||
std::unique_ptr<int64_t> output_offsets_ptr;
|
||||
std::unique_ptr<int32_t> batch_size_ptr;
|
||||
// intermediate_state includes two parts: reserved / used buffer size and rows
|
||||
std::unique_ptr<IntermediateState> output_intermediate_state_ptr;
|
||||
|
||||
JniContext(int64_t num_args, JavaFunctionCall* parent):
|
||||
parent(parent) {
|
||||
input_values_buffer_ptr = (int64_t) new int64_t[num_args];
|
||||
input_nulls_buffer_ptr = (int64_t) new int64_t[num_args];
|
||||
input_byte_offsets_ptr = (int64_t) new int64_t[num_args];
|
||||
|
||||
output_value_buffer = (int64_t) malloc(sizeof(int64_t));
|
||||
output_null_value = (int64_t) malloc(sizeof(int64_t));
|
||||
batch_size_ptr = (int64_t) malloc(sizeof(int32_t));
|
||||
input_values_buffer_ptr.reset(new int64_t[num_args]);
|
||||
input_nulls_buffer_ptr.reset(new int64_t[num_args]);
|
||||
input_offsets_ptrs.reset(new int64_t[num_args]);
|
||||
output_value_buffer.reset((int64_t*) malloc(sizeof(int64_t)));
|
||||
output_null_value.reset((int64_t*) malloc(sizeof(int64_t)));
|
||||
batch_size_ptr.reset((int32_t*) malloc(sizeof(int32_t)));
|
||||
output_offsets_ptr.reset((int64_t*) malloc(sizeof(int64_t)));
|
||||
output_intermediate_state_ptr.reset((IntermediateState*) malloc(sizeof(IntermediateState)));
|
||||
}
|
||||
|
||||
~JniContext() {
|
||||
@ -109,12 +118,6 @@ private:
|
||||
Status s = JniUtil::GetJniExceptionMsg(env);
|
||||
if (!s.ok()) LOG(WARNING) << s.get_error_msg();
|
||||
env->DeleteGlobalRef(executor);
|
||||
delete[] ((int64*) input_values_buffer_ptr);
|
||||
delete[] ((int64*) input_nulls_buffer_ptr);
|
||||
delete[] ((int64*) input_byte_offsets_ptr);
|
||||
free((int64*) output_value_buffer);
|
||||
free((int64*) output_null_value);
|
||||
free((int32*) batch_size_ptr);
|
||||
}
|
||||
|
||||
/// These functions are cross-compiled to IR and used by codegen.
|
||||
@ -122,6 +125,12 @@ private:
|
||||
JniContext* jni_ctx, int index, uint8_t value);
|
||||
static uint8_t* GetInputValuesBufferAtOffset(JniContext* jni_ctx, int offset);
|
||||
};
|
||||
|
||||
static const int32_t INITIAL_RESERVED_BUFFER_SIZE = 1024;
|
||||
// TODO: we need a heuristic strategy to increase buffer size for variable-size output.
|
||||
static inline int32_t IncreaseReservedBufferSize(int n) {
|
||||
return INITIAL_RESERVED_BUFFER_SIZE << n;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace vectorized
|
||||
|
||||
Reference in New Issue
Block a user