[enchanment](udf) add more info when download jar package failed (#21440)

when download jar package, some times show the checksum is not equal,
but the root reason is unknown, now add some error msg if failed.
This commit is contained in:
zhangstar333
2023-07-04 20:35:35 +08:00
committed by GitHub
parent 3b73604f74
commit 122f5f6c2d

View File

@ -25,6 +25,7 @@
#include <unistd.h>
#include <atomic>
#include <cstdint>
#include <memory>
#include <ostream>
#include <regex>
@ -55,6 +56,15 @@ struct UserFunctionCacheEntry {
: function_id(fid_), checksum(checksum_), lib_file(lib_file_), type(type) {}
~UserFunctionCacheEntry();
std::string debug_string() {
fmt::memory_buffer error_msg;
fmt::format_to(error_msg,
" the info of UserFunctionCacheEntry save in BE, function_id:{}, "
"checksum:{}, lib_file:{}, is_downloaded:{}. ",
function_id, checksum, lib_file, is_downloaded);
return fmt::to_string(error_msg);
}
int64_t function_id = 0;
// used to check if this library is valid.
std::string checksum;
@ -136,13 +146,17 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std
} else if (ends_with(file, ".jar")) {
lib_type = LibType::JAR;
} else {
return Status::InternalError("unknown library file format: " + file);
return Status::InternalError(
"unknown library file format. the file type is not end with xxx.jar or xxx.so : " +
file);
}
std::vector<std::string> split_parts = strings::Split(file, ".");
if (split_parts.size() != 3 && split_parts.size() != 4) {
return Status::InternalError(
"user function's name should be function_id.checksum[.file_name].file_type");
"user function's name should be function_id.checksum[.file_name].file_type, now "
"the all split parts are by delimiter(.): " +
file);
}
int64_t function_id = std::stol(split_parts[0]);
std::string checksum = split_parts[1];
@ -150,7 +164,7 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std
if (it != _entry_map.end()) {
LOG(WARNING) << "meet a same function id user function library, function_id=" << function_id
<< ", one_checksum=" << checksum
<< ", other_checksum=" << it->second->checksum;
<< ", other_checksum info: = " << it->second->debug_string();
return Status::InternalError("duplicate function id");
}
// create a cache entry and put it into entry map
@ -213,7 +227,7 @@ Status UserFunctionCache::_get_cache_entry(int64_t fid, const std::string& url,
}
auto st = _load_cache_entry(url, entry);
if (!st.ok()) {
LOG(WARNING) << "fail to load cache entry, fid=" << fid;
LOG(WARNING) << "fail to load cache entry, fid=" << fid << " " << file_name << " " << url;
// if we load a cache entry failed, I think we should delete this entry cache
// even if this cache was valid before.
_destroy_cache_entry(entry);
@ -271,10 +285,13 @@ Status UserFunctionCache::_download_lib(const std::string& url,
Md5Digest digest;
HttpClient client;
int64_t file_size = 0;
RETURN_IF_ERROR(client.init(real_url));
Status status;
auto download_cb = [&status, &tmp_file, &fp, &digest](const void* data, size_t length) {
auto download_cb = [&status, &tmp_file, &fp, &digest, &file_size](const void* data,
size_t length) {
digest.update(data, length);
file_size = file_size + length;
auto res = fwrite(data, length, 1, fp.get());
if (res != 1) {
LOG(WARNING) << "fail to write data to file, file=" << tmp_file
@ -288,9 +305,15 @@ Status UserFunctionCache::_download_lib(const std::string& url,
RETURN_IF_ERROR(status);
digest.digest();
if (!iequal(digest.hex(), entry->checksum)) {
LOG(WARNING) << "UDF's checksum is not equal, one=" << digest.hex()
<< ", other=" << entry->checksum;
return Status::InternalError("UDF's library checksum is not match");
fmt::memory_buffer error_msg;
fmt::format_to(
error_msg,
" The checksum is not equal of {} ({}). The init info of first create entry is:"
"{} But download file check_sum is: {}, file_size is: {}.",
url, real_url, entry->debug_string(), digest.hex(), file_size);
std::string error(fmt::to_string(error_msg));
LOG(WARNING) << error;
return Status::InternalError(error);
}
// close this file
fp.reset();