[feature-wip](java-udf) support java UDF with fixed-length input and output (#8516)
This feature is propsoed in [DSIP-1](https://cwiki.apache.org/confluence/display/DORIS/DSIP-001%3A+Java+UDF). This PR support fixed-length input and output Java UDF. Phase I in DIP-1 is done after this PR. To support Java UDF effeciently, I use no data copy in JNI call and all compute operations are off-heap in Java. To achieve that, I use a UdfExecutor instead. For users, a UDF class must have a public evaluate method.
This commit is contained in:
@ -28,6 +28,7 @@
|
||||
#include "http/http_client.h"
|
||||
#include "util/dynamic_util.h"
|
||||
#include "util/file_utils.h"
|
||||
#include "util/jni-util.h"
|
||||
#include "util/md5.h"
|
||||
#include "util/spinlock.h"
|
||||
|
||||
@ -37,8 +38,9 @@ static const int kLibShardNum = 128;
|
||||
|
||||
// function cache entry, store information for
|
||||
struct UserFunctionCacheEntry {
|
||||
UserFunctionCacheEntry(int64_t fid_, const std::string& checksum_, const std::string& lib_file_)
|
||||
: function_id(fid_), checksum(checksum_), lib_file(lib_file_) {}
|
||||
UserFunctionCacheEntry(int64_t fid_, const std::string& checksum_, const std::string& lib_file_,
|
||||
LibType type)
|
||||
: function_id(fid_), checksum(checksum_), lib_file(lib_file_), type(type) {}
|
||||
~UserFunctionCacheEntry();
|
||||
|
||||
void ref() { _refs.fetch_add(1); }
|
||||
@ -75,6 +77,8 @@ struct UserFunctionCacheEntry {
|
||||
// from symbol_name to function pointer
|
||||
std::unordered_map<std::string, void*> fptr_map;
|
||||
|
||||
LibType type;
|
||||
|
||||
private:
|
||||
std::atomic<int> _refs{0};
|
||||
};
|
||||
@ -141,7 +145,7 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std
|
||||
}
|
||||
// create a cache entry and put it into entry map
|
||||
UserFunctionCacheEntry* entry =
|
||||
new UserFunctionCacheEntry(function_id, checksum, dir + "/" + file);
|
||||
new UserFunctionCacheEntry(function_id, checksum, dir + "/" + file, LibType::SO);
|
||||
entry->is_downloaded = true;
|
||||
|
||||
entry->ref();
|
||||
@ -199,7 +203,7 @@ Status UserFunctionCache::get_function_ptr(int64_t fid, const std::string& orig_
|
||||
if (output_entry != nullptr && *output_entry != nullptr) {
|
||||
entry = *output_entry;
|
||||
} else {
|
||||
RETURN_IF_ERROR(_get_cache_entry(fid, url, checksum, &entry));
|
||||
RETURN_IF_ERROR(_get_cache_entry(fid, url, checksum, &entry, LibType::SO));
|
||||
need_unref_entry = true;
|
||||
}
|
||||
|
||||
@ -237,7 +241,8 @@ Status UserFunctionCache::get_function_ptr(int64_t fid, const std::string& orig_
|
||||
|
||||
Status UserFunctionCache::_get_cache_entry(int64_t fid, const std::string& url,
|
||||
const std::string& checksum,
|
||||
UserFunctionCacheEntry** output_entry) {
|
||||
UserFunctionCacheEntry** output_entry,
|
||||
LibType type) {
|
||||
UserFunctionCacheEntry* entry = nullptr;
|
||||
{
|
||||
std::lock_guard<std::mutex> l(_cache_lock);
|
||||
@ -245,7 +250,7 @@ Status UserFunctionCache::_get_cache_entry(int64_t fid, const std::string& url,
|
||||
if (it != _entry_map.end()) {
|
||||
entry = it->second;
|
||||
} else {
|
||||
entry = new UserFunctionCacheEntry(fid, checksum, _make_lib_file(fid, checksum));
|
||||
entry = new UserFunctionCacheEntry(fid, checksum, _make_lib_file(fid, checksum, type), type);
|
||||
|
||||
entry->ref();
|
||||
_entry_map.emplace(fid, entry);
|
||||
@ -292,7 +297,14 @@ Status UserFunctionCache::_load_cache_entry(const std::string& url, UserFunction
|
||||
RETURN_IF_ERROR(_download_lib(url, entry));
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_load_cache_entry_internal(entry));
|
||||
if (entry->type == LibType::SO) {
|
||||
RETURN_IF_ERROR(_load_cache_entry_internal(entry));
|
||||
} else if (entry->type == LibType::JAR) {
|
||||
RETURN_IF_ERROR(_add_to_classpath(entry));
|
||||
} else {
|
||||
return Status::InvalidArgument(
|
||||
"Unsupported lib type! Make sure your lib type is one of 'so' and 'jar'!");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -356,10 +368,38 @@ Status UserFunctionCache::_load_cache_entry_internal(UserFunctionCacheEntry* ent
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::string UserFunctionCache::_make_lib_file(int64_t function_id, const std::string& checksum) {
|
||||
Status UserFunctionCache::_add_to_classpath(UserFunctionCacheEntry* entry) {
|
||||
#ifdef LIBJVM
|
||||
const std::string path = "file://" + entry->lib_file;
|
||||
LOG(INFO) << "Add jar " << path << " to classpath";
|
||||
JNIEnv* env;
|
||||
RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
|
||||
jclass class_class_loader = env->FindClass("java/lang/ClassLoader");
|
||||
jmethodID method_get_system_class_loader =
|
||||
env->GetStaticMethodID(class_class_loader, "getSystemClassLoader", "()Ljava/lang/ClassLoader;");
|
||||
jobject class_loader = env->CallStaticObjectMethod(class_class_loader, method_get_system_class_loader);
|
||||
jclass class_url_class_loader = env->FindClass("java/net/URLClassLoader");
|
||||
jmethodID method_add_url = env->GetMethodID(class_url_class_loader, "addURL", "(Ljava/net/URL;)V");
|
||||
jclass class_url = env->FindClass("java/net/URL");
|
||||
jmethodID url_ctor = env->GetMethodID(class_url, "<init>", "(Ljava/lang/String;)V");
|
||||
jobject urlInstance = env->NewObject(class_url, url_ctor, env->NewStringUTF(path.c_str()));
|
||||
env->CallVoidMethod(class_loader, method_add_url, urlInstance);
|
||||
return Status::OK();
|
||||
#else
|
||||
return Status::InternalError("No libjvm is found!");
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string UserFunctionCache::_make_lib_file(int64_t function_id, const std::string& checksum,
|
||||
LibType type) {
|
||||
int shard = function_id % kLibShardNum;
|
||||
std::stringstream ss;
|
||||
ss << _lib_dir << '/' << shard << '/' << function_id << '.' << checksum << ".so";
|
||||
ss << _lib_dir << '/' << shard << '/' << function_id << '.' << checksum;
|
||||
if (type == LibType::JAR) {
|
||||
ss << ".jar";
|
||||
} else {
|
||||
ss << ".so";
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
@ -372,4 +412,12 @@ void UserFunctionCache::release_entry(UserFunctionCacheEntry* entry) {
|
||||
}
|
||||
}
|
||||
|
||||
Status UserFunctionCache::get_jarpath(int64_t fid, const std::string& url, const std::string& checksum,
|
||||
std::string* libpath) {
|
||||
UserFunctionCacheEntry* entry = nullptr;
|
||||
RETURN_IF_ERROR(_get_cache_entry(fid, url, checksum, &entry, LibType::JAR));
|
||||
*libpath = entry->lib_file;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
Reference in New Issue
Block a user