[SQL][Function] Fix from/to_base64 may return incorrect value (#4183)
from/to_base64 may return incorrect value when the value is null #4130 remove the duplicated base64 code fix the base64 encoded string length is wrong, and this will cause the memory error
This commit is contained in:
@ -22,7 +22,6 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/exprs")
|
||||
set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/exprs")
|
||||
|
||||
add_library(Exprs
|
||||
base64.cpp
|
||||
encryption_functions.cpp
|
||||
aggregate_functions.cpp
|
||||
agg_fn_evaluator.cpp
|
||||
|
||||
@ -1,169 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "base64.h"
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
static char s_encoding_table[] = {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
|
||||
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
|
||||
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
|
||||
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
|
||||
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
|
||||
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
|
||||
'w', 'x', 'y', 'z', '0', '1', '2', '3',
|
||||
'4', '5', '6', '7', '8', '9', '+', '/'
|
||||
};
|
||||
|
||||
static const char base64_pad = '=';
|
||||
|
||||
static short s_decoding_table[256] = {
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
|
||||
-2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
|
||||
-2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
|
||||
};
|
||||
|
||||
static int s_mod_table[] = {0, 2, 1};
|
||||
|
||||
namespace doris {
|
||||
|
||||
size_t base64_encode2(const unsigned char *data,
|
||||
size_t length,
|
||||
unsigned char *encoded_data) {
|
||||
size_t output_length = (size_t) (4.0 * ceil((double) length / 3.0));
|
||||
|
||||
if (encoded_data == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0, j = 0; i < length;) {
|
||||
uint32_t octet_a = i < length ? data[i++] : 0;
|
||||
uint32_t octet_b = i < length ? data[i++] : 0;
|
||||
uint32_t octet_c = i < length ? data[i++] : 0;
|
||||
uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c;
|
||||
|
||||
encoded_data[j++] = s_encoding_table[(triple >> 3 * 6) & 0x3F];
|
||||
encoded_data[j++] = s_encoding_table[(triple >> 2 * 6) & 0x3F];
|
||||
encoded_data[j++] = s_encoding_table[(triple >> 1 * 6) & 0x3F];
|
||||
encoded_data[j++] = s_encoding_table[(triple >> 0 * 6) & 0x3F];
|
||||
}
|
||||
|
||||
for (int i = 0; i < s_mod_table[length % 3]; i++) {
|
||||
encoded_data[output_length - 1 - i] = '=';
|
||||
}
|
||||
|
||||
return output_length;
|
||||
}
|
||||
|
||||
int64_t base64_decode2(
|
||||
const char *data,
|
||||
size_t length,
|
||||
char *decoded_data) {
|
||||
const char *current = data;
|
||||
int ch = 0;
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
int k = 0;
|
||||
|
||||
// run through the whole string, converting as we go
|
||||
while ((ch = *current++) != '\0' && length-- > 0) {
|
||||
if (ch == base64_pad) {
|
||||
if (*current != '=' && (i % 4) == 1) {
|
||||
return -1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = s_decoding_table[ch];
|
||||
// a space or some other separator character, we simply skip over
|
||||
if (ch == -1) {
|
||||
continue;
|
||||
} else if (ch == -2) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i % 4) {
|
||||
case 0:
|
||||
decoded_data[j] = ch << 2;
|
||||
break;
|
||||
case 1:
|
||||
decoded_data[j++] |= ch >> 4;
|
||||
decoded_data[j] = (ch & 0x0f) << 4;
|
||||
break;
|
||||
case 2:
|
||||
decoded_data[j++] |= ch >>2;
|
||||
decoded_data[j] = (ch & 0x03) << 6;
|
||||
break;
|
||||
case 3:
|
||||
decoded_data[j++] |= ch;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
k = j;
|
||||
/* mop things up if we ended on a boundary */
|
||||
if (ch == base64_pad) {
|
||||
switch (i % 4) {
|
||||
case 1:
|
||||
return 0;
|
||||
case 2:
|
||||
k++;
|
||||
case 3:
|
||||
decoded_data[k] = 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
decoded_data[j] = '\0';
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
/*bool base64_decode2(const std::string& in, std::string* out) {
|
||||
char* tmp = new char[in.length()];
|
||||
|
||||
int64_t len = base64_decode(in.c_str(), in.length(), tmp);
|
||||
if (len < 0) {
|
||||
delete[] tmp;
|
||||
return false;
|
||||
}
|
||||
out->assign(tmp, len);
|
||||
delete[] tmp;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
}
|
||||
@ -1,36 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#ifndef DORIS_BE_EXPRS_BASE64_H
|
||||
#define DORIS_BE_EXPRS_BASE64_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace doris {
|
||||
|
||||
int64_t base64_decode2(
|
||||
const char *data,
|
||||
size_t length,
|
||||
char *decoded_data);
|
||||
|
||||
size_t base64_encode2(const unsigned char *data,
|
||||
size_t length,
|
||||
unsigned char *encoded_data);
|
||||
|
||||
}
|
||||
#endif
|
||||
@ -23,7 +23,7 @@
|
||||
#include "exprs/expr.h"
|
||||
#include "util/debug_util.h"
|
||||
#include "runtime/tuple_row.h"
|
||||
#include "exprs/base64.h"
|
||||
#include "util/url_coding.h"
|
||||
#include <boost/smart_ptr.hpp>
|
||||
#include "runtime/string_value.h"
|
||||
|
||||
@ -69,7 +69,7 @@ StringVal EncryptionFunctions::aes_decrypt(FunctionContext* ctx,
|
||||
}
|
||||
|
||||
StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal &src) {
|
||||
if (src.len == 0) {
|
||||
if (src.len == 0 || src.is_null) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
@ -77,7 +77,7 @@ StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal
|
||||
boost::scoped_array<char> p;
|
||||
p.reset(new char[cipher_len]);
|
||||
|
||||
int ret_code = base64_decode2((const char *)src.ptr, src.len, p.get());
|
||||
int ret_code = base64_decode((const char *)src.ptr, src.len, p.get());
|
||||
if (ret_code < 0) {
|
||||
return StringVal::null();
|
||||
}
|
||||
@ -85,15 +85,15 @@ StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal
|
||||
}
|
||||
|
||||
StringVal EncryptionFunctions::to_base64(FunctionContext* ctx, const StringVal &src) {
|
||||
if (src.len == 0) {
|
||||
if (src.len == 0 || src.is_null) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
int cipher_len = src.len * 4 / 3 + 1;
|
||||
int cipher_len = (size_t) (4.0 * ceil((double) src.len / 3.0));
|
||||
boost::scoped_array<char> p;
|
||||
p.reset(new char[cipher_len]);
|
||||
|
||||
int ret_code = base64_encode2((unsigned char *)src.ptr, src.len, (unsigned char *)p.get());
|
||||
int ret_code = base64_encode((unsigned char *)src.ptr, src.len, (unsigned char *)p.get());
|
||||
if (ret_code < 0) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
@ -25,8 +25,6 @@
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/err.h>
|
||||
|
||||
#include "exprs/base64.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
static const int AES_MAX_KEY_LENGTH = 256;
|
||||
|
||||
@ -184,7 +184,7 @@ size_t base64_encode(const unsigned char* data, size_t length, unsigned char* en
|
||||
return output_length;
|
||||
}
|
||||
|
||||
static inline int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
|
||||
int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
|
||||
const char* current = data;
|
||||
int ch = 0;
|
||||
int i = 0;
|
||||
|
||||
@ -39,13 +39,15 @@ void url_encode(const std::vector<uint8_t>& in, std::string* out);
|
||||
// certain characters like ' '.
|
||||
bool url_decode(const std::string& in, std::string* out);
|
||||
|
||||
void base64url_encode(const std::string& in, std::string* out);
|
||||
void base64_encode(const std::string& in, std::string* out);
|
||||
void base64url_encode(const std::string& in, std::string *out);
|
||||
void base64_encode(const std::string& in, std::string *out);
|
||||
size_t base64_encode(const unsigned char *data, size_t length, unsigned char *encoded_data);
|
||||
|
||||
// Utility method to decode base64 encoded strings. Also not extremely
|
||||
// performant.
|
||||
// Returns true unless the string could not be correctly decoded.
|
||||
bool base64_decode(const std::string& in, std::string* out);
|
||||
int64_t base64_decode(const char *data, size_t length, char *decoded_data);
|
||||
|
||||
// Replaces &, < and > with &, < and > respectively. This is
|
||||
// not the full set of required encodings, but one that should be
|
||||
|
||||
@ -31,4 +31,5 @@ ADD_BE_TEST(timestamp_functions_test)
|
||||
ADD_BE_TEST(percentile_approx_test)
|
||||
ADD_BE_TEST(bitmap_function_test)
|
||||
ADD_BE_TEST(hll_function_test)
|
||||
ADD_BE_TEST(encryption_functions_test)
|
||||
#ADD_BE_TEST(in-predicate-test)
|
||||
|
||||
84
be/test/exprs/encryption_functions_test.cpp
Normal file
84
be/test/exprs/encryption_functions_test.cpp
Normal file
@ -0,0 +1,84 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exprs/encryption_functions.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "exprs/anyval_util.h"
|
||||
#include "testutil/function_utils.h"
|
||||
#include "util/logging.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace doris {
|
||||
class EncryptionFunctionsTest : public testing::Test {
|
||||
public:
|
||||
EncryptionFunctionsTest() = default;
|
||||
|
||||
void SetUp() {
|
||||
utils = new FunctionUtils();
|
||||
ctx = utils->get_fn_ctx();
|
||||
}
|
||||
void TearDown() { delete utils; }
|
||||
|
||||
private:
|
||||
FunctionUtils* utils;
|
||||
FunctionContext* ctx;
|
||||
};
|
||||
|
||||
TEST_F(EncryptionFunctionsTest, from_base64) {
|
||||
std::unique_ptr<doris_udf::FunctionContext> context(new doris_udf::FunctionContext());
|
||||
{
|
||||
StringVal result = EncryptionFunctions::from_base64(context.get(), doris_udf::StringVal("aGVsbG8="));
|
||||
StringVal expected = doris_udf::StringVal("hello");
|
||||
ASSERT_EQ(expected, result);
|
||||
}
|
||||
|
||||
{
|
||||
StringVal result = EncryptionFunctions::from_base64(context.get(), doris_udf::StringVal::null());
|
||||
StringVal expected = doris_udf::StringVal::null();
|
||||
ASSERT_EQ(expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(EncryptionFunctionsTest, to_base64) {
|
||||
std::unique_ptr<doris_udf::FunctionContext> context(new doris_udf::FunctionContext());
|
||||
|
||||
{
|
||||
StringVal result = EncryptionFunctions::to_base64(context.get(), doris_udf::StringVal("hello"));
|
||||
StringVal expected = doris_udf::StringVal("aGVsbG8=");
|
||||
ASSERT_EQ(expected, result);
|
||||
}
|
||||
{
|
||||
StringVal result = EncryptionFunctions::to_base64(context.get(), doris_udf::StringVal::null());
|
||||
StringVal expected = doris_udf::StringVal::null();
|
||||
ASSERT_EQ(expected, result);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
|
||||
if (!doris::config::init(conffile.c_str(), false)) {
|
||||
fprintf(stderr, "error read config file. \n");
|
||||
return -1;
|
||||
}
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -21,7 +21,7 @@
|
||||
#include <memory>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "exprs/base64.h"
|
||||
#include "util/url_coding.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -64,7 +64,7 @@ TEST_F(AesUtilTest, aes_test_by_case) {
|
||||
std::string source_2 = "doris test";
|
||||
|
||||
std::unique_ptr<char[]> encrypt_1(new char[case_1.length()]);
|
||||
int length_1 = base64_decode2(case_1.c_str(), case_1.length(), encrypt_1.get());
|
||||
int length_1 = base64_decode(case_1.c_str(), case_1.length(), encrypt_1.get());
|
||||
std::unique_ptr<char[]> decrypted_1(new char[case_1.length()]);
|
||||
int ret_code = AesUtil::decrypt(AES_128_ECB, (unsigned char *)encrypt_1.get(), length_1,
|
||||
(unsigned char *)_aes_key.c_str(), _aes_key.length(), NULL, true, (unsigned char *)decrypted_1.get());
|
||||
@ -73,7 +73,7 @@ TEST_F(AesUtilTest, aes_test_by_case) {
|
||||
ASSERT_EQ(source_1, decrypted_content_1);
|
||||
|
||||
std::unique_ptr<char[]> encrypt_2(new char[case_2.length()]);
|
||||
int length_2 = base64_decode2(case_2.c_str(), case_2.length(), encrypt_2.get());
|
||||
int length_2 = base64_decode(case_2.c_str(), case_2.length(), encrypt_2.get());
|
||||
std::unique_ptr<char[]> decrypted_2(new char[case_2.length()]);
|
||||
ret_code = AesUtil::decrypt(AES_128_ECB, (unsigned char *)encrypt_2.get(), length_2,
|
||||
(unsigned char *)_aes_key.c_str(), _aes_key.length(), NULL, true, (unsigned char *)decrypted_2.get());
|
||||
|
||||
@ -190,6 +190,7 @@ ${DORIS_TEST_BINARY_DIR}/exprs/timestamp_functions_test
|
||||
${DORIS_TEST_BINARY_DIR}/exprs/percentile_approx_test
|
||||
${DORIS_TEST_BINARY_DIR}/exprs/bitmap_function_test
|
||||
${DORIS_TEST_BINARY_DIR}/exprs/hll_function_test
|
||||
${DORIS_TEST_BINARY_DIR}/exprs/encryption_functions_test
|
||||
|
||||
## Running geo unit test
|
||||
${DORIS_TEST_BINARY_DIR}/geo/geo_functions_test
|
||||
|
||||
Reference in New Issue
Block a user