351 lines
14 KiB
C++
351 lines
14 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
// This file is copied from
|
|
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsCodingIP.cpp
|
|
// and modified by Doris
|
|
|
|
#pragma once
|
|
#include <glog/logging.h>
|
|
|
|
#include "vec/columns/column.h"
|
|
#include "vec/columns/column_string.h"
|
|
#include "vec/columns/column_vector.h"
|
|
#include "vec/common/format_ip.h"
|
|
#include "vec/core/column_with_type_and_name.h"
|
|
#include "vec/data_types/data_type_ipv6.h"
|
|
#include "vec/data_types/data_type_number.h"
|
|
#include "vec/data_types/data_type_string.h"
|
|
#include "vec/functions/function.h"
|
|
#include "vec/functions/function_helpers.h"
|
|
#include "vec/functions/simple_function_factory.h"
|
|
|
|
namespace doris::vectorized {
|
|
|
|
/** If mask_tail_octets > 0, the last specified number of octets will be filled with "xxx".
|
|
*/
|
|
template <size_t mask_tail_octets, typename Name>
|
|
class FunctionIPv4NumToString : public IFunction {
|
|
private:
|
|
template <typename ArgType>
|
|
Status execute_type(Block& block, const ColumnWithTypeAndName& argument, size_t result) const {
|
|
using ColumnType = ColumnVector<ArgType>;
|
|
const ColumnPtr& column = argument.column;
|
|
|
|
if (const ColumnType* col = typeid_cast<const ColumnType*>(column.get())) {
|
|
const typename ColumnType::Container& vec_in = col->get_data();
|
|
auto col_res = ColumnString::create();
|
|
|
|
ColumnString::Chars& vec_res = col_res->get_chars();
|
|
ColumnString::Offsets& offsets_res = col_res->get_offsets();
|
|
|
|
vec_res.resize(vec_in.size() *
|
|
(IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
|
|
offsets_res.resize(vec_in.size());
|
|
char* begin = reinterpret_cast<char*>(vec_res.data());
|
|
char* pos = begin;
|
|
|
|
auto null_map = ColumnUInt8::create(vec_in.size(), 0);
|
|
size_t src_size = std::min(sizeof(ArgType), (unsigned long)4);
|
|
for (size_t i = 0; i < vec_in.size(); ++i) {
|
|
auto value = vec_in[i];
|
|
if (value < IPV4_MIN_NUM_VALUE || value > IPV4_MAX_NUM_VALUE) {
|
|
offsets_res[i] = pos - begin;
|
|
null_map->get_data()[i] = 1;
|
|
} else {
|
|
formatIPv4(reinterpret_cast<const unsigned char*>(&vec_in[i]), src_size, pos,
|
|
mask_tail_octets, "xxx");
|
|
offsets_res[i] = pos - begin;
|
|
}
|
|
}
|
|
|
|
vec_res.resize(pos - begin);
|
|
block.replace_by_position(
|
|
result, ColumnNullable::create(std::move(col_res), std::move(null_map)));
|
|
return Status::OK();
|
|
} else
|
|
return Status::RuntimeError("Illegal column {} of argument of function {}",
|
|
argument.column->get_name(), get_name());
|
|
}
|
|
|
|
public:
|
|
static constexpr auto name = "ipv4numtostring";
|
|
static FunctionPtr create() {
|
|
return std::make_shared<FunctionIPv4NumToString<mask_tail_octets, Name>>();
|
|
}
|
|
|
|
String get_name() const override { return name; }
|
|
|
|
size_t get_number_of_arguments() const override { return 1; }
|
|
|
|
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
|
return make_nullable(std::make_shared<DataTypeString>());
|
|
}
|
|
|
|
bool use_default_implementation_for_nulls() const override { return true; }
|
|
|
|
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
|
size_t result, size_t input_rows_count) const override {
|
|
ColumnWithTypeAndName& argument = block.get_by_position(arguments[0]);
|
|
|
|
switch (argument.type->get_type_id()) {
|
|
case TypeIndex::Int8:
|
|
return execute_type<Int8>(block, argument, result);
|
|
case TypeIndex::Int16:
|
|
return execute_type<Int16>(block, argument, result);
|
|
case TypeIndex::Int32:
|
|
return execute_type<Int32>(block, argument, result);
|
|
case TypeIndex::Int64:
|
|
return execute_type<Int64>(block, argument, result);
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return Status::RuntimeError(
|
|
"Illegal column {} of argument of function {}, expected Int8 or Int16 or Int32 or "
|
|
"Int64",
|
|
argument.name, get_name());
|
|
}
|
|
};
|
|
|
|
enum class IPStringToNumExceptionMode : uint8_t { Throw, Default, Null };
|
|
|
|
static inline bool tryParseIPv4(const char* pos, Int64& result_value) {
|
|
return parseIPv4whole(pos, reinterpret_cast<unsigned char*>(&result_value));
|
|
}
|
|
|
|
template <IPStringToNumExceptionMode exception_mode, typename ToColumn>
|
|
ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) {
|
|
const ColumnString* column_string = check_and_get_column<ColumnString>(column.get());
|
|
|
|
if (!column_string) {
|
|
throw Exception(ErrorCode::INVALID_ARGUMENT,
|
|
"Illegal column {} of argument of function {}, expected String",
|
|
column->get_name());
|
|
}
|
|
|
|
size_t column_size = column_string->size();
|
|
|
|
ColumnUInt8::MutablePtr col_null_map_to;
|
|
ColumnUInt8::Container* vec_null_map_to = nullptr;
|
|
|
|
if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
|
|
col_null_map_to = ColumnUInt8::create(column_size, false);
|
|
vec_null_map_to = &col_null_map_to->get_data();
|
|
}
|
|
|
|
auto col_res = ToColumn::create();
|
|
|
|
auto& vec_res = col_res->get_data();
|
|
vec_res.resize(column_size);
|
|
|
|
const ColumnString::Chars& vec_src = column_string->get_chars();
|
|
const ColumnString::Offsets& offsets_src = column_string->get_offsets();
|
|
size_t prev_offset = 0;
|
|
|
|
for (size_t i = 0; i < vec_res.size(); ++i) {
|
|
if (null_map && (*null_map)[i]) {
|
|
vec_res[i] = 0;
|
|
prev_offset = offsets_src[i];
|
|
if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
|
|
(*vec_null_map_to)[i] = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
const char* src_start = reinterpret_cast<const char*>(&vec_src[prev_offset]);
|
|
size_t src_length = (i < vec_res.size() - 1) ? (offsets_src[i] - prev_offset)
|
|
: (vec_src.size() - prev_offset);
|
|
std::string src(src_start, src_length);
|
|
bool parse_result = tryParseIPv4(src.c_str(), vec_res[i]);
|
|
|
|
if (!parse_result) {
|
|
if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) {
|
|
throw Exception(ErrorCode::INVALID_ARGUMENT, "Invalid IPv4 value");
|
|
} else if constexpr (exception_mode == IPStringToNumExceptionMode::Default) {
|
|
vec_res[i] = 0;
|
|
} else if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
|
|
(*vec_null_map_to)[i] = true;
|
|
vec_res[i] = 0;
|
|
}
|
|
}
|
|
|
|
prev_offset = offsets_src[i];
|
|
}
|
|
|
|
if constexpr (exception_mode == IPStringToNumExceptionMode::Null)
|
|
return ColumnNullable::create(std::move(col_res), std::move(col_null_map_to));
|
|
|
|
return col_res;
|
|
}
|
|
|
|
template <IPStringToNumExceptionMode exception_mode>
|
|
class FunctionIPv4StringToNum : public IFunction {
|
|
public:
|
|
static constexpr auto name = exception_mode == IPStringToNumExceptionMode::Throw
|
|
? "ipv4stringtonum"
|
|
: (exception_mode == IPStringToNumExceptionMode::Default
|
|
? "ipv4stringtonumordefault"
|
|
: "ipv4stringtonumornull");
|
|
|
|
static FunctionPtr create() {
|
|
return std::make_shared<FunctionIPv4StringToNum<exception_mode>>();
|
|
}
|
|
|
|
String get_name() const override { return name; }
|
|
|
|
size_t get_number_of_arguments() const override { return 1; }
|
|
|
|
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
|
if (!is_string(remove_nullable(arguments[0]))) {
|
|
throw Exception(ErrorCode::INVALID_ARGUMENT,
|
|
"Illegal type {} of argument of function {}", arguments[0]->get_name(),
|
|
get_name());
|
|
}
|
|
auto result_type = std::make_shared<DataTypeInt64>();
|
|
|
|
if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
|
|
return make_nullable(result_type);
|
|
}
|
|
|
|
return arguments[0]->is_nullable() ? make_nullable(result_type) : result_type;
|
|
}
|
|
|
|
bool use_default_implementation_for_nulls() const override { return false; }
|
|
|
|
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
|
size_t result, size_t input_rows_count) const override {
|
|
ColumnPtr column = block.get_by_position(arguments[0]).column;
|
|
ColumnPtr null_map_column;
|
|
const NullMap* null_map = nullptr;
|
|
if (column->is_nullable()) {
|
|
const auto* column_nullable = assert_cast<const ColumnNullable*>(column.get());
|
|
column = column_nullable->get_nested_column_ptr();
|
|
null_map_column = column_nullable->get_null_map_column_ptr();
|
|
null_map = &column_nullable->get_null_map_data();
|
|
}
|
|
|
|
auto col_res = convertToIPv4<exception_mode, ColumnInt64>(column, null_map);
|
|
|
|
if (null_map && !col_res->is_nullable()) {
|
|
block.replace_by_position(result,
|
|
ColumnNullable::create(IColumn::mutate(col_res),
|
|
IColumn::mutate(null_map_column)));
|
|
return Status::OK();
|
|
}
|
|
|
|
block.replace_by_position(result, col_res);
|
|
return Status::OK();
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
void process_ipv6_column(const ColumnPtr& column, size_t input_rows_count,
|
|
ColumnString::Chars& vec_res, ColumnString::Offsets& offsets_res,
|
|
ColumnUInt8::MutablePtr& null_map, unsigned char* ipv6_address_data) {
|
|
auto* begin = reinterpret_cast<char*>(vec_res.data());
|
|
auto* pos = begin;
|
|
|
|
const auto* col = check_and_get_column<T>(column.get());
|
|
|
|
for (size_t i = 0; i < input_rows_count; ++i) {
|
|
bool is_empty = false;
|
|
|
|
if constexpr (std::is_same_v<T, ColumnIPv6>) {
|
|
const auto& vec_in = col->get_data();
|
|
memcpy(ipv6_address_data, reinterpret_cast<const unsigned char*>(&vec_in[i]),
|
|
IPV6_BINARY_LENGTH);
|
|
} else {
|
|
const auto str_ref = col->get_data_at(i);
|
|
const char* value = str_ref.data;
|
|
size_t value_size = str_ref.size;
|
|
|
|
if (value_size > IPV6_BINARY_LENGTH || value == nullptr || value_size == 0) {
|
|
is_empty = true;
|
|
} else {
|
|
memcpy(ipv6_address_data, value, value_size);
|
|
memset(ipv6_address_data + value_size, 0, IPV6_BINARY_LENGTH - value_size);
|
|
}
|
|
}
|
|
|
|
if (is_empty) {
|
|
offsets_res[i] = pos - begin;
|
|
null_map->get_data()[i] = 1;
|
|
} else {
|
|
formatIPv6(ipv6_address_data, pos);
|
|
offsets_res[i] = pos - begin;
|
|
}
|
|
}
|
|
}
|
|
|
|
class FunctionIPv6NumToString : public IFunction {
|
|
public:
|
|
static constexpr auto name = "ipv6numtostring";
|
|
static FunctionPtr create() { return std::make_shared<FunctionIPv6NumToString>(); }
|
|
|
|
String get_name() const override { return name; }
|
|
|
|
size_t get_number_of_arguments() const override { return 1; }
|
|
|
|
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
|
const auto* arg_string = check_and_get_data_type<DataTypeString>(arguments[0].get());
|
|
const auto* arg_ipv6 = check_and_get_data_type<DataTypeIPv6>(arguments[0].get());
|
|
if (!arg_ipv6 && !(arg_string))
|
|
throw Exception(ErrorCode::INVALID_ARGUMENT,
|
|
"Illegal type {} of argument of function {}, expected IPv6 or String",
|
|
arguments[0]->get_name(), get_name());
|
|
|
|
return make_nullable(std::make_shared<DataTypeString>());
|
|
}
|
|
|
|
bool use_default_implementation_for_nulls() const override { return true; }
|
|
|
|
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
|
size_t result, size_t input_rows_count) const override {
|
|
const ColumnPtr& column = block.get_by_position(arguments[0]).column;
|
|
const auto* col_ipv6 = check_and_get_column<ColumnIPv6>(column.get());
|
|
const auto* col_string = check_and_get_column<ColumnString>(column.get());
|
|
|
|
if (!col_ipv6 && !col_string)
|
|
throw Exception(ErrorCode::INVALID_ARGUMENT,
|
|
"Illegal column {} of argument of function {}, expected IPv6 or String",
|
|
column->get_name(), get_name());
|
|
|
|
auto col_res = ColumnString::create();
|
|
ColumnString::Chars& vec_res = col_res->get_chars();
|
|
ColumnString::Offsets& offsets_res = col_res->get_offsets();
|
|
vec_res.resize(input_rows_count * (IPV6_MAX_TEXT_LENGTH + 1));
|
|
offsets_res.resize(input_rows_count);
|
|
|
|
auto null_map = ColumnUInt8::create(input_rows_count, 0);
|
|
|
|
unsigned char ipv6_address_data[IPV6_BINARY_LENGTH];
|
|
|
|
if (col_ipv6) {
|
|
process_ipv6_column<ColumnIPv6>(column, input_rows_count, vec_res, offsets_res,
|
|
null_map, ipv6_address_data);
|
|
} else {
|
|
process_ipv6_column<ColumnString>(column, input_rows_count, vec_res, offsets_res,
|
|
null_map, ipv6_address_data);
|
|
}
|
|
|
|
block.replace_by_position(result,
|
|
ColumnNullable::create(std::move(col_res), std::move(null_map)));
|
|
return Status::OK();
|
|
}
|
|
};
|
|
|
|
} // namespace doris::vectorized
|