diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index b4520619c7..a11c786085 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -22,7 +22,6 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/exprs") set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/exprs") add_library(Exprs - base64.cpp encryption_functions.cpp aggregate_functions.cpp agg_fn_evaluator.cpp diff --git a/be/src/exprs/base64.cpp b/be/src/exprs/base64.cpp deleted file mode 100644 index ca902b1e03..0000000000 --- a/be/src/exprs/base64.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "base64.h" -#include -#include -#include - -static char s_encoding_table[] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', - 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/' -}; - -static const char base64_pad = '='; - -static short s_decoding_table[256] = { - -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2, - -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2, - -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2 -}; - -static int s_mod_table[] = {0, 2, 1}; - -namespace doris { - -size_t base64_encode2(const unsigned char *data, - size_t length, - unsigned char *encoded_data) { - size_t output_length = (size_t) (4.0 * ceil((double) length / 3.0)); - - if (encoded_data == NULL) { - return 0; - } - - for (uint32_t i = 0, j = 0; i < length;) { - uint32_t octet_a = i < length ? data[i++] : 0; - uint32_t octet_b = i < length ? data[i++] : 0; - uint32_t octet_c = i < length ? data[i++] : 0; - uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c; - - encoded_data[j++] = s_encoding_table[(triple >> 3 * 6) & 0x3F]; - encoded_data[j++] = s_encoding_table[(triple >> 2 * 6) & 0x3F]; - encoded_data[j++] = s_encoding_table[(triple >> 1 * 6) & 0x3F]; - encoded_data[j++] = s_encoding_table[(triple >> 0 * 6) & 0x3F]; - } - - for (int i = 0; i < s_mod_table[length % 3]; i++) { - encoded_data[output_length - 1 - i] = '='; - } - - return output_length; -} - -int64_t base64_decode2( - const char *data, - size_t length, - char *decoded_data) { - const char *current = data; - int ch = 0; - int i = 0; - int j = 0; - int k = 0; - - // run through the whole string, converting as we go - while ((ch = *current++) != '\0' && length-- > 0) { - if (ch == base64_pad) { - if (*current != '=' && (i % 4) == 1) { - return -1; - } - continue; - } - - ch = s_decoding_table[ch]; - // a space or some other separator character, we simply skip over - if (ch == -1) { - continue; - } else if (ch == -2) { - return -1; - } - - switch (i % 4) { - case 0: - decoded_data[j] = ch << 2; - break; - case 1: - decoded_data[j++] |= ch >> 4; - decoded_data[j] = (ch & 0x0f) << 4; - break; - case 2: - decoded_data[j++] |= ch >>2; - decoded_data[j] = (ch & 0x03) << 6; - break; - case 3: - decoded_data[j++] |= ch; - break; - default: - break; - } - - i++; - } - - k = j; - /* mop things up if we ended on a boundary */ - if (ch == base64_pad) { - switch (i % 4) { - case 1: - return 0; - case 2: - k++; - case 3: - decoded_data[k] = 0; - default: - break; - } - } - - decoded_data[j] = '\0'; - - return j; -} - -/*bool base64_decode2(const std::string& in, std::string* out) { - char* tmp = new char[in.length()]; - - int64_t len = base64_decode(in.c_str(), in.length(), tmp); - if (len < 0) { - delete[] tmp; - return false; - } - out->assign(tmp, len); - delete[] tmp; - return true; -} -*/ -} diff --git a/be/src/exprs/base64.h b/be/src/exprs/base64.h deleted file mode 100644 index 736724e214..0000000000 --- a/be/src/exprs/base64.h +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef DORIS_BE_EXPRS_BASE64_H -#define DORIS_BE_EXPRS_BASE64_H - -#include -#include - -namespace doris { - -int64_t base64_decode2( - const char *data, - size_t length, - char *decoded_data); - -size_t base64_encode2(const unsigned char *data, - size_t length, - unsigned char *encoded_data); - -} -#endif diff --git a/be/src/exprs/encryption_functions.cpp b/be/src/exprs/encryption_functions.cpp index 3ca2fe038c..d3ba87fdbb 100644 --- a/be/src/exprs/encryption_functions.cpp +++ b/be/src/exprs/encryption_functions.cpp @@ -23,7 +23,7 @@ #include "exprs/expr.h" #include "util/debug_util.h" #include "runtime/tuple_row.h" -#include "exprs/base64.h" +#include "util/url_coding.h" #include #include "runtime/string_value.h" @@ -69,7 +69,7 @@ StringVal EncryptionFunctions::aes_decrypt(FunctionContext* ctx, } StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal &src) { - if (src.len == 0) { + if (src.len == 0 || src.is_null) { return StringVal::null(); } @@ -77,7 +77,7 @@ StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal boost::scoped_array p; p.reset(new char[cipher_len]); - int ret_code = base64_decode2((const char *)src.ptr, src.len, p.get()); + int ret_code = base64_decode((const char *)src.ptr, src.len, p.get()); if (ret_code < 0) { return StringVal::null(); } @@ -85,15 +85,15 @@ StringVal EncryptionFunctions::from_base64(FunctionContext* ctx, const StringVal } StringVal EncryptionFunctions::to_base64(FunctionContext* ctx, const StringVal &src) { - if (src.len == 0) { + if (src.len == 0 || src.is_null) { return StringVal::null(); } - int cipher_len = src.len * 4 / 3 + 1; + int cipher_len = (size_t) (4.0 * ceil((double) src.len / 3.0)); boost::scoped_array p; p.reset(new char[cipher_len]); - int ret_code = base64_encode2((unsigned char *)src.ptr, src.len, (unsigned char *)p.get()); + int ret_code = base64_encode((unsigned char *)src.ptr, src.len, (unsigned char *)p.get()); if (ret_code < 0) { return StringVal::null(); } diff --git a/be/src/util/aes_util.cpp b/be/src/util/aes_util.cpp index 9c5b67b739..feed2f9695 100644 --- a/be/src/util/aes_util.cpp +++ b/be/src/util/aes_util.cpp @@ -25,8 +25,6 @@ #include #include -#include "exprs/base64.h" - namespace doris { static const int AES_MAX_KEY_LENGTH = 256; diff --git a/be/src/util/url_coding.cpp b/be/src/util/url_coding.cpp index 7e2624f997..3e960c02d8 100644 --- a/be/src/util/url_coding.cpp +++ b/be/src/util/url_coding.cpp @@ -184,7 +184,7 @@ size_t base64_encode(const unsigned char* data, size_t length, unsigned char* en return output_length; } -static inline int64_t base64_decode(const char* data, size_t length, char* decoded_data) { +int64_t base64_decode(const char* data, size_t length, char* decoded_data) { const char* current = data; int ch = 0; int i = 0; diff --git a/be/src/util/url_coding.h b/be/src/util/url_coding.h index 37ca4a7fbf..8dc03795fe 100644 --- a/be/src/util/url_coding.h +++ b/be/src/util/url_coding.h @@ -39,13 +39,15 @@ void url_encode(const std::vector& in, std::string* out); // certain characters like ' '. bool url_decode(const std::string& in, std::string* out); -void base64url_encode(const std::string& in, std::string* out); -void base64_encode(const std::string& in, std::string* out); +void base64url_encode(const std::string& in, std::string *out); +void base64_encode(const std::string& in, std::string *out); +size_t base64_encode(const unsigned char *data, size_t length, unsigned char *encoded_data); // Utility method to decode base64 encoded strings. Also not extremely // performant. // Returns true unless the string could not be correctly decoded. bool base64_decode(const std::string& in, std::string* out); +int64_t base64_decode(const char *data, size_t length, char *decoded_data); // Replaces &, < and > with &, < and > respectively. This is // not the full set of required encodings, but one that should be diff --git a/be/test/exprs/CMakeLists.txt b/be/test/exprs/CMakeLists.txt index d7af9962a0..0a6f6ec7a6 100644 --- a/be/test/exprs/CMakeLists.txt +++ b/be/test/exprs/CMakeLists.txt @@ -31,4 +31,5 @@ ADD_BE_TEST(timestamp_functions_test) ADD_BE_TEST(percentile_approx_test) ADD_BE_TEST(bitmap_function_test) ADD_BE_TEST(hll_function_test) +ADD_BE_TEST(encryption_functions_test) #ADD_BE_TEST(in-predicate-test) diff --git a/be/test/exprs/encryption_functions_test.cpp b/be/test/exprs/encryption_functions_test.cpp new file mode 100644 index 0000000000..9d92e545a8 --- /dev/null +++ b/be/test/exprs/encryption_functions_test.cpp @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/encryption_functions.h" +#include +#include +#include "exprs/anyval_util.h" +#include "testutil/function_utils.h" +#include "util/logging.h" + +#include + +namespace doris { +class EncryptionFunctionsTest : public testing::Test { +public: + EncryptionFunctionsTest() = default; + + void SetUp() { + utils = new FunctionUtils(); + ctx = utils->get_fn_ctx(); + } + void TearDown() { delete utils; } + +private: + FunctionUtils* utils; + FunctionContext* ctx; +}; + +TEST_F(EncryptionFunctionsTest, from_base64) { + std::unique_ptr context(new doris_udf::FunctionContext()); + { + StringVal result = EncryptionFunctions::from_base64(context.get(), doris_udf::StringVal("aGVsbG8=")); + StringVal expected = doris_udf::StringVal("hello"); + ASSERT_EQ(expected, result); + } + + { + StringVal result = EncryptionFunctions::from_base64(context.get(), doris_udf::StringVal::null()); + StringVal expected = doris_udf::StringVal::null(); + ASSERT_EQ(expected, result); + } +} + +TEST_F(EncryptionFunctionsTest, to_base64) { + std::unique_ptr context(new doris_udf::FunctionContext()); + + { + StringVal result = EncryptionFunctions::to_base64(context.get(), doris_udf::StringVal("hello")); + StringVal expected = doris_udf::StringVal("aGVsbG8="); + ASSERT_EQ(expected, result); + } + { + StringVal result = EncryptionFunctions::to_base64(context.get(), doris_udf::StringVal::null()); + StringVal expected = doris_udf::StringVal::null(); + ASSERT_EQ(expected, result); + } + +} + +} // namespace doris + +int main(int argc, char** argv) { + std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; + if (!doris::config::init(conffile.c_str(), false)) { + fprintf(stderr, "error read config file. \n"); + return -1; + } + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/be/test/util/aes_util_test.cpp b/be/test/util/aes_util_test.cpp index 979d88630b..c9e1ee0212 100644 --- a/be/test/util/aes_util_test.cpp +++ b/be/test/util/aes_util_test.cpp @@ -21,7 +21,7 @@ #include #include -#include "exprs/base64.h" +#include "util/url_coding.h" namespace doris { @@ -64,7 +64,7 @@ TEST_F(AesUtilTest, aes_test_by_case) { std::string source_2 = "doris test"; std::unique_ptr encrypt_1(new char[case_1.length()]); - int length_1 = base64_decode2(case_1.c_str(), case_1.length(), encrypt_1.get()); + int length_1 = base64_decode(case_1.c_str(), case_1.length(), encrypt_1.get()); std::unique_ptr decrypted_1(new char[case_1.length()]); int ret_code = AesUtil::decrypt(AES_128_ECB, (unsigned char *)encrypt_1.get(), length_1, (unsigned char *)_aes_key.c_str(), _aes_key.length(), NULL, true, (unsigned char *)decrypted_1.get()); @@ -73,7 +73,7 @@ TEST_F(AesUtilTest, aes_test_by_case) { ASSERT_EQ(source_1, decrypted_content_1); std::unique_ptr encrypt_2(new char[case_2.length()]); - int length_2 = base64_decode2(case_2.c_str(), case_2.length(), encrypt_2.get()); + int length_2 = base64_decode(case_2.c_str(), case_2.length(), encrypt_2.get()); std::unique_ptr decrypted_2(new char[case_2.length()]); ret_code = AesUtil::decrypt(AES_128_ECB, (unsigned char *)encrypt_2.get(), length_2, (unsigned char *)_aes_key.c_str(), _aes_key.length(), NULL, true, (unsigned char *)decrypted_2.get()); diff --git a/run-ut.sh b/run-ut.sh index f657058d8b..6c9fdd6bb9 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -190,6 +190,7 @@ ${DORIS_TEST_BINARY_DIR}/exprs/timestamp_functions_test ${DORIS_TEST_BINARY_DIR}/exprs/percentile_approx_test ${DORIS_TEST_BINARY_DIR}/exprs/bitmap_function_test ${DORIS_TEST_BINARY_DIR}/exprs/hll_function_test +${DORIS_TEST_BINARY_DIR}/exprs/encryption_functions_test ## Running geo unit test ${DORIS_TEST_BINARY_DIR}/geo/geo_functions_test