branch-2.1: [enhancement]Optimize GeoFunctions for const columns #34396 (#50067)

Cherry-picked from #34396

Co-authored-by: koarz <66543806+koarz@users.noreply.github.com>
This commit is contained in:
github-actions[bot]
2025-04-16 14:05:46 +08:00
committed by GitHub
parent fe634555bd
commit aa4b54952c
2 changed files with 289 additions and 116 deletions

View File

@ -44,33 +44,70 @@ struct StPoint {
DCHECK_EQ(arguments.size(), 2);
auto return_type = block.get_data_type(result);
auto column_x =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto column_y =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
const auto& [left_column, left_const] =
unpack_if_const(block.get_by_position(arguments[0]).column);
const auto& [right_column, right_const] =
unpack_if_const(block.get_by_position(arguments[1]).column);
const auto size = column_x->size();
const auto size = std::max(left_column->size(), right_column->size());
MutableColumnPtr res = return_type->create_column();
GeoPoint point;
std::string buf;
for (int row = 0; row < size; ++row) {
auto cur_res = point.from_coord(column_x->operator[](row).get<Float64>(),
column_y->operator[](row).get<Float64>());
if (cur_res != GEO_PARSE_OK) {
res->insert_data(nullptr, 0);
continue;
}
buf.clear();
point.encode_to(&buf);
res->insert_data(buf.data(), buf.size());
if (left_const) {
const_vector(left_column, right_column, res, size, point, buf);
} else if (right_const) {
vector_const(left_column, right_column, res, size, point, buf);
} else {
vector_vector(left_column, right_column, res, size, point, buf);
}
block.replace_by_position(result, std::move(res));
return Status::OK();
}
static void loop_do(GeoParseStatus& cur_res, MutableColumnPtr& res, GeoPoint& point,
std::string& buf) {
if (cur_res != GEO_PARSE_OK) {
res->insert_data(nullptr, 0);
return;
}
buf.clear();
point.encode_to(&buf);
res->insert_data(buf.data(), buf.size());
}
static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, const size_t size, GeoPoint& point,
std::string& buf) {
double x = left_column->operator[](0).get<Float64>();
for (int row = 0; row < size; ++row) {
auto cur_res = point.from_coord(x, right_column->operator[](row).get<Float64>());
loop_do(cur_res, res, point, buf);
}
}
static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, const size_t size, GeoPoint& point,
std::string& buf) {
double y = right_column->operator[](0).get<Float64>();
for (int row = 0; row < size; ++row) {
auto cur_res = point.from_coord(right_column->operator[](row).get<Float64>(), y);
loop_do(cur_res, res, point, buf);
}
}
static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, const size_t size, GeoPoint& point,
std::string& buf) {
for (int row = 0; row < size; ++row) {
auto cur_res = point.from_coord(left_column->operator[](row).get<Float64>(),
right_column->operator[](row).get<Float64>());
loop_do(cur_res, res, point, buf);
}
}
};
struct StAsTextName {
@ -304,38 +341,79 @@ struct StAzimuth {
auto return_type = block.get_data_type(result);
MutableColumnPtr res = return_type->create_column();
auto p1 = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto p2 = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
const auto size = p1->size();
const auto& [left_column, left_const] =
unpack_if_const(block.get_by_position(arguments[0]).column);
const auto& [right_column, right_const] =
unpack_if_const(block.get_by_position(arguments[1]).column);
const auto size = std::max(left_column->size(), right_column->size());
GeoPoint point1;
GeoPoint point2;
for (int row = 0; row < size; ++row) {
auto shape_value1 = p1->get_data_at(row);
auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size);
if (!pt1) {
res->insert_default();
continue;
}
auto shape_value2 = p2->get_data_at(row);
auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size);
if (!pt2) {
res->insert_default();
continue;
}
double angle = 0;
if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
res->insert_default();
continue;
}
res->insert_data(const_cast<const char*>((char*)&angle), 0);
if (left_const) {
const_vector(left_column, right_column, res, size, point1, point2);
} else if (right_const) {
vector_const(left_column, right_column, res, size, point1, point2);
} else {
vector_vector(left_column, right_column, res, size, point1, point2);
}
block.replace_by_position(result, std::move(res));
return Status::OK();
}
static void loop_do(bool& pt1, bool& pt2, GeoPoint& point1, GeoPoint& point2,
MutableColumnPtr& res) {
if (!(pt1 && pt2)) {
res->insert_default();
return;
}
double angle = 0;
if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
res->insert_default();
return;
}
res->insert_data(const_cast<const char*>((char*)&angle), 0);
}
static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, size_t size, GeoPoint& point1,
GeoPoint& point2) {
auto shape_value1 = left_column->get_data_at(0);
auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size);
for (int row = 0; row < size; ++row) {
auto shape_value2 = right_column->get_data_at(row);
auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size);
loop_do(pt1, pt2, point1, point2, res);
}
}
static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, size_t size, GeoPoint& point1,
GeoPoint& point2) {
auto shape_value2 = right_column->get_data_at(0);
auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size);
for (int row = 0; row < size; ++row) {
auto shape_value1 = left_column->get_data_at(row);
auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size);
loop_do(pt1, pt2, point1, point2, res);
}
}
static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, size_t size, GeoPoint& point1,
GeoPoint& point2) {
for (int row = 0; row < size; ++row) {
auto shape_value1 = left_column->get_data_at(row);
auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size);
auto shape_value2 = right_column->get_data_at(row);
auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size);
loop_do(pt1, pt2, point1, point2, res);
}
}
};
struct StAreaSquareMeters {
@ -463,36 +541,78 @@ struct StContains {
size_t result) {
DCHECK_EQ(arguments.size(), 2);
auto return_type = block.get_data_type(result);
auto shape1 = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto shape2 = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
const auto& [left_column, left_const] =
unpack_if_const(block.get_by_position(arguments[0]).column);
const auto& [right_column, right_const] =
unpack_if_const(block.get_by_position(arguments[1]).column);
const auto size = std::max(left_column->size(), right_column->size());
const auto size = shape1->size();
MutableColumnPtr res = return_type->create_column();
int i;
std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
for (int row = 0; row < size; ++row) {
auto lhs_value = shape1->get_data_at(row);
auto rhs_value = shape2->get_data_at(row);
StringRef* strs[2] = {&lhs_value, &rhs_value};
for (i = 0; i < 2; ++i) {
shapes[i] = std::shared_ptr<GeoShape>(
GeoShape::from_encoded(strs[i]->data, strs[i]->size));
if (shapes[i] == nullptr) {
res->insert_default();
break;
}
}
if (i == 2) {
auto contains_value = shapes[0]->contains(shapes[1].get());
res->insert_data(const_cast<const char*>((char*)&contains_value), 0);
}
if (left_const) {
const_vector(left_column, right_column, res, size);
} else if (right_const) {
vector_const(left_column, right_column, res, size);
} else {
vector_vector(left_column, right_column, res, size);
}
block.replace_by_position(result, std::move(res));
return Status::OK();
}
static void loop_do(StringRef& lhs_value, StringRef& rhs_value,
std::vector<std::shared_ptr<GeoShape>>& shapes, int& i,
MutableColumnPtr& res) {
StringRef* strs[2] = {&lhs_value, &rhs_value};
for (i = 0; i < 2; ++i) {
shapes[i] =
std::shared_ptr<GeoShape>(GeoShape::from_encoded(strs[i]->data, strs[i]->size));
if (shapes[i] == nullptr) {
res->insert_default();
break;
}
}
if (i == 2) {
auto contains_value = shapes[0]->contains(shapes[1].get());
res->insert_data(const_cast<const char*>((char*)&contains_value), 0);
}
}
static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, const size_t size) {
int i;
auto lhs_value = left_column->get_data_at(0);
std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
for (int row = 0; row < size; ++row) {
auto rhs_value = right_column->get_data_at(row);
loop_do(lhs_value, rhs_value, shapes, i, res);
}
}
static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, const size_t size) {
int i;
auto rhs_value = right_column->get_data_at(0);
std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
for (int row = 0; row < size; ++row) {
auto lhs_value = left_column->get_data_at(row);
loop_do(lhs_value, rhs_value, shapes, i, res);
}
}
static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
MutableColumnPtr& res, const size_t size) {
int i;
std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
for (int row = 0; row < size; ++row) {
auto lhs_value = left_column->get_data_at(row);
auto rhs_value = right_column->get_data_at(row);
loop_do(lhs_value, rhs_value, shapes, i, res);
}
}
static Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
return Status::OK();
}
@ -638,7 +758,7 @@ struct StAsBinary {
auto return_type = block.get_data_type(result);
MutableColumnPtr res = return_type->create_column();
auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto col = block.get_by_position(arguments[0]).column;
const auto size = col->size();
std::unique_ptr<GeoShape> shape;

View File

@ -40,21 +40,39 @@ using namespace ut_type;
TEST(VGeoFunctionsTest, function_geo_st_point_test) {
std::string func_name = "st_point";
GeoPoint point;
auto cur_res = point.from_coord(24.7, 56.7);
EXPECT_TRUE(cur_res == GEO_PARSE_OK);
std::string buf;
point.encode_to(&buf);
DataSet data_set = {{{(double)24.7, (double)56.7}, buf},
{{Null(), (double)5}, Null()},
{{(double)5, Null()}, Null()}};
{
InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
GeoPoint point;
auto cur_res = point.from_coord(24.7, 56.7);
EXPECT_TRUE(cur_res == GEO_PARSE_OK);
std::string buf;
point.encode_to(&buf);
DataSet data_set = {{{(double)24.7, (double)56.7}, buf},
{{Null(), (double)5}, Null()},
{{(double)5, Null()}, Null()}};
static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
}
{
InputTypeSet input_types = {Consted {TypeIndex::Float64}, TypeIndex::Float64};
for (const auto& line : data_set) {
DataSet const_dataset = {line};
static_cast<void>(
check_function<DataTypeString, true>(func_name, input_types, const_dataset));
}
}
{
InputTypeSet input_types = {TypeIndex::Float64, Consted {TypeIndex::Float64}};
for (const auto& line : data_set) {
DataSet const_dataset = {line};
static_cast<void>(
check_function<DataTypeString, true>(func_name, input_types, const_dataset));
}
}
}
TEST(VGeoFunctionsTest, function_geo_st_as_text) {
@ -199,62 +217,97 @@ TEST(VGeoFunctionsTest, function_geo_st_angle) {
TEST(VGeoFunctionsTest, function_geo_st_azimuth) {
std::string func_name = "st_azimuth";
GeoPoint point1;
auto cur_res1 = point1.from_coord(0, 0);
EXPECT_TRUE(cur_res1 == GEO_PARSE_OK);
GeoPoint point2;
auto cur_res2 = point2.from_coord(1, 0);
EXPECT_TRUE(cur_res2 == GEO_PARSE_OK);
std::string buf1;
point1.encode_to(&buf1);
std::string buf2;
point2.encode_to(&buf2);
DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966},
{{buf1, Null()}, Null()},
{{Null(), buf2}, Null()}};
{
InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
GeoPoint point1;
auto cur_res1 = point1.from_coord(0, 0);
EXPECT_TRUE(cur_res1 == GEO_PARSE_OK);
GeoPoint point2;
auto cur_res2 = point2.from_coord(1, 0);
EXPECT_TRUE(cur_res2 == GEO_PARSE_OK);
std::string buf1;
point1.encode_to(&buf1);
std::string buf2;
point2.encode_to(&buf2);
DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966},
{{buf1, Null()}, Null()},
{{Null(), buf2}, Null()}};
static_cast<void>(check_function<DataTypeFloat64, true>(func_name, input_types, data_set));
}
{
InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}};
for (const auto& line : data_set) {
DataSet const_dataset = {line};
static_cast<void>(
check_function<DataTypeFloat64, true>(func_name, input_types, const_dataset));
}
}
{
InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String};
for (const auto& line : data_set) {
DataSet const_dataset = {line};
static_cast<void>(
check_function<DataTypeFloat64, true>(func_name, input_types, const_dataset));
}
}
}
TEST(VGeoFunctionsTest, function_geo_st_contains) {
std::string func_name = "st_contains";
std::string buf1;
std::string buf2;
std::string buf3;
GeoParseStatus status;
std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))");
std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(), shape1.size(), &status));
EXPECT_TRUE(status == GEO_PARSE_OK);
EXPECT_TRUE(shape != nullptr);
shape->encode_to(&buf1);
GeoPoint point1;
status = point1.from_coord(5, 5);
EXPECT_TRUE(status == GEO_PARSE_OK);
point1.encode_to(&buf2);
GeoPoint point2;
status = point2.from_coord(50, 50);
EXPECT_TRUE(status == GEO_PARSE_OK);
point2.encode_to(&buf3);
DataSet data_set = {{{buf1, buf2}, (uint8_t)1},
{{buf1, buf3}, (uint8_t)0},
{{buf1, Null()}, Null()},
{{Null(), buf3}, Null()}};
{
InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
std::string buf1;
std::string buf2;
std::string buf3;
GeoParseStatus status;
std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))");
std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(), shape1.size(), &status));
EXPECT_TRUE(status == GEO_PARSE_OK);
EXPECT_TRUE(shape != nullptr);
shape->encode_to(&buf1);
GeoPoint point1;
status = point1.from_coord(5, 5);
EXPECT_TRUE(status == GEO_PARSE_OK);
point1.encode_to(&buf2);
GeoPoint point2;
status = point2.from_coord(50, 50);
EXPECT_TRUE(status == GEO_PARSE_OK);
point2.encode_to(&buf3);
DataSet data_set = {{{buf1, buf2}, (uint8_t)1},
{{buf1, buf3}, (uint8_t)0},
{{buf1, Null()}, Null()},
{{Null(), buf3}, Null()}};
static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, data_set));
}
{
InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String};
for (const auto& line : data_set) {
DataSet const_dataset = {line};
static_cast<void>(
check_function<DataTypeUInt8, true>(func_name, input_types, const_dataset));
}
}
{
InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}};
for (const auto& line : data_set) {
DataSet const_dataset = {line};
static_cast<void>(
check_function<DataTypeUInt8, true>(func_name, input_types, const_dataset));
}
}
}
TEST(VGeoFunctionsTest, function_geo_st_circle) {