[cherry-pick](branch-2.1)fix http error when downloading varaint inverted index file #35668 (#36061)

pick from master[#35668](https://github.com/apache/doris/pull/35668)
This commit is contained in:
Sun Chenyang
2024-06-11 14:09:05 +08:00
committed by GitHub
parent 4a277affdc
commit 0dccc4e6e4
4 changed files with 299 additions and 2 deletions

View File

@ -17,6 +17,7 @@
#include "olap/task/engine_clone_task.h"
#include <curl/curl.h>
#include <fcntl.h>
#include <fmt/format.h>
#include <gen_cpp/AgentService_types.h>
@ -512,8 +513,26 @@ Status EngineCloneTask::_download_files(DataDir* data_dir, const std::string& re
uint64_t total_file_size = 0;
MonotonicStopWatch watch;
watch.start();
auto curl = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>(curl_easy_init(),
&curl_easy_cleanup);
if (!curl) {
return Status::InternalError("engine clone task init curl failed");
}
for (auto& file_name : file_name_list) {
auto remote_file_url = remote_url_prefix + file_name;
// The file name of the variant column with the inverted index contains %
// such as: 020000000000003f624c4c322c568271060f9b5b274a4a95_0_10133@properties%2Emessage.idx
// {rowset_id}_{seg_num}_{index_id}_{variant_column_name}{%2E}{extracted_column_name}.idx
// We need to handle %, otherwise it will cause an HTTP 404 error.
// Because the percent ("%") character serves as the indicator for percent-encoded octets,
// it must be percent-encoded as "%25" for that octet to be used as data within a URI.
// https://datatracker.ietf.org/doc/html/rfc3986
auto output = std::unique_ptr<char, decltype(&curl_free)>(
curl_easy_escape(curl.get(), file_name.c_str(), file_name.length()), &curl_free);
if (!output) {
return Status::InternalError("escape file name failed, file name={}", file_name);
}
std::string encoded_filename(output.get());
auto remote_file_url = remote_url_prefix + encoded_filename;
// get file length
uint64_t file_size = 0;