branch-2.1: [fix](func) Fix precision loss in ST_GeometryFromWKB coordinate parsing #46661 (#47263)

Cherry-picked from #46661

Co-authored-by: lw112 <131352377+felixwluo@users.noreply.github.com>
This commit is contained in:
github-actions[bot]
2025-01-24 17:34:37 +08:00
committed by GitHub
parent ad6737796f
commit 3b7cc67ec2
9 changed files with 176 additions and 84 deletions

View File

@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default;
void print_s2point(std::ostream& os, const S2Point& point) {
S2LatLng coord(point);
os << std::setprecision(12) << coord.lng().degrees() << " " << coord.lat().degrees();
os << std::setprecision(15) << coord.lng().degrees() << " " << coord.lat().degrees();
}
static inline bool is_valid_lng_lat(double lng, double lat) {

View File

@ -122,108 +122,169 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) {
auto size = is.tellg();
is.seekg(0, std::ios::beg);
std::vector<unsigned char> buf(static_cast<size_t>(size));
is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size));
ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian
ctx->shape = readGeometry(ctx).release();
if (!ctx->shape) {
// Check if size is valid
if (size <= 0) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
std::vector<unsigned char> buf(static_cast<size_t>(size));
if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
// Ensure we have at least one byte for byte order
if (buf.empty()) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
// First read the byte order using machine endian
auto byteOrder = buf[0];
// Create ByteOrderDataInStream with the correct byte order
if (byteOrder == byteOrder::wkbNDR) {
ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
} else if (byteOrder == byteOrder::wkbXDR) {
ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
} else {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
std::unique_ptr<GeoShape> shape = readGeometry(ctx);
if (!shape) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
return ctx;
}
ctx->shape = shape.release();
return ctx;
}
std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
// determine byte order
unsigned char byteOrder = ctx->dis.readByte();
try {
// Ensure we have enough data to read
if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type
return nullptr;
}
// default is machine endian
if (byteOrder == byteOrder::wkbNDR) {
ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
} else if (byteOrder == byteOrder::wkbXDR) {
ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
}
// Skip the byte order as we've already handled it
ctx->dis.readByte();
uint32_t typeInt = ctx->dis.readUnsigned();
uint32_t typeInt = ctx->dis.readUnsigned();
uint32_t geometryType = (typeInt & 0xffff) % 1000;
// Check if geometry has SRID
bool has_srid = (typeInt & WKB_SRID_FLAG) != 0;
std::unique_ptr<GeoShape> shape;
// Read SRID if present
if (has_srid) {
ctx->dis.readUnsigned(); // Read and store SRID if needed
}
switch (geometryType) {
case wkbType::wkbPoint:
shape.reset(readPoint(ctx).release());
break;
case wkbType::wkbLine:
shape.reset(readLine(ctx).release());
break;
case wkbType::wkbPolygon:
shape.reset(readPolygon(ctx).release());
break;
default:
// Get the base geometry type
uint32_t geometryType = typeInt & WKB_TYPE_MASK;
std::unique_ptr<GeoShape> shape;
switch (geometryType) {
case wkbType::wkbPoint:
shape = readPoint(ctx);
break;
case wkbType::wkbLine:
shape = readLine(ctx);
break;
case wkbType::wkbPolygon:
shape = readPolygon(ctx);
break;
default:
return nullptr;
}
return shape;
} catch (...) {
// Handle any exceptions from reading operations
return nullptr;
}
return shape;
}
std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) {
GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) {
return point;
} else {
if (coords.list.empty()) {
return nullptr;
}
std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) {
return nullptr;
}
return point;
}
std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) {
uint32_t size = ctx->dis.readUnsigned();
minMemSize(wkbLine, size, ctx);
GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
std::unique_ptr<GeoLine> line = GeoLine::create_unique();
if (line->from_coords(coords) == GEO_PARSE_OK) {
return line;
} else {
if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) {
return nullptr;
}
GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
if (coords.list.empty()) {
return nullptr;
}
std::unique_ptr<GeoLine> line = GeoLine::create_unique();
if (!line || line->from_coords(coords) != GEO_PARSE_OK) {
return nullptr;
}
return line;
}
std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) {
uint32_t num_loops = ctx->dis.readUnsigned();
minMemSize(wkbPolygon, num_loops, ctx);
if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) {
return nullptr;
}
GeoCoordinateListList coordss;
for (int i = 0; i < num_loops; ++i) {
for (uint32_t i = 0; i < num_loops; ++i) {
uint32_t size = ctx->dis.readUnsigned();
GeoCoordinateList* coords = new GeoCoordinateList();
if (size < 3) { // A polygon loop must have at least 3 points
return nullptr;
}
auto coords = std::make_unique<GeoCoordinateList>();
*coords = WkbParse::readCoordinateList(size, ctx);
coordss.add(coords);
if (coords->list.empty()) {
return nullptr;
}
coordss.add(coords.release());
}
std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();
if (polygon->from_coords(coordss) == GEO_PARSE_OK) {
return polygon;
} else {
if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) {
return nullptr;
}
return polygon;
}
GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) {
GeoCoordinateList coords;
for (uint32_t i = 0; i < size; i++) {
readCoordinate(ctx);
if (!readCoordinate(ctx)) {
return GeoCoordinateList();
}
unsigned int j = 0;
GeoCoordinate coord;
coord.x = ctx->ordValues[j++];
coord.y = ctx->ordValues[j++];
coords.add(coord);
}
return coords;
}

View File

@ -17,8 +17,7 @@
#pragma once
#include <stdint.h>
#include <cstdint>
#include <iosfwd>
#include <memory>
@ -34,6 +33,17 @@ class GeoLine;
class GeoPoint;
class GeoPolygon;
// WKB format constants
// According to OpenGIS Implementation Specification:
// The high bit of the type value is set to 1 if the WKB contains a SRID.
// Reference: OpenGIS Implementation Specification for Geographic information - Simple feature access - Part 1: Common architecture
// Bit mask to check if WKB contains SRID
constexpr uint32_t WKB_SRID_FLAG = 0x20000000;
// The geometry type is stored in the least significant byte of the type value
// Bit mask to extract the base geometry type
constexpr uint32_t WKB_TYPE_MASK = 0xFF;
class WkbParse {
public:
static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape);

View File

@ -3,7 +3,7 @@
100 test 2021-01-02
-- !select_geo1 --
POINT (123.123456789 89.123456789)
POINT (123.123456789012 89.123456789)
-- !select2 --
20230101

View File

@ -1335,32 +1335,32 @@ POINT (98.35620117 36.939093)
-- !sql_st_circle_Double_Double_Double --
\N
CIRCLE ((-46.35620117 39.939093), 3.32100009918)
CIRCLE ((-74.35620117 79.939093), 66.3209991455)
CIRCLE ((126.35620117 -39.939093), 5.30000019073)
CIRCLE ((16.35620117 19.939093), 7.32100009918)
CIRCLE ((43.35620117 35.939093), 2.32100009918)
CIRCLE ((47.35620117 26.939093), 33.3209991455)
CIRCLE ((5 5), 4.32100009918)
CIRCLE ((90.35620117 39.939093), 100.320999146)
CIRCLE ((90.35620117 47.939093), 88.3209991455)
CIRCLE ((90.35620117 49.939093), 76.3209991455)
CIRCLE ((90.35620117 59.939093), 75.3209991455)
CIRCLE ((98.35620117 36.939093), 45.3209991455)
CIRCLE ((-46.35620117 39.939093), 3.32100009918213)
CIRCLE ((-74.35620117 79.939093), 66.3209991455078)
CIRCLE ((126.35620117 -39.939093), 5.30000019073486)
CIRCLE ((16.35620117 19.939093), 7.32100009918213)
CIRCLE ((43.35620117 35.939093), 2.32100009918213)
CIRCLE ((47.35620117 26.939093), 33.3209991455078)
CIRCLE ((5 5), 4.32100009918213)
CIRCLE ((90.35620117 39.939093), 100.320999145508)
CIRCLE ((90.35620117 47.939093), 88.3209991455078)
CIRCLE ((90.35620117 49.939093), 76.3209991455078)
CIRCLE ((90.35620117 59.939093), 75.3209991455078)
CIRCLE ((98.35620117 36.939093), 45.3209991455078)
-- !sql_st_circle_Double_Double_Double_notnull --
CIRCLE ((-46.35620117 39.939093), 3.32100009918)
CIRCLE ((-74.35620117 79.939093), 66.3209991455)
CIRCLE ((126.35620117 -39.939093), 5.30000019073)
CIRCLE ((16.35620117 19.939093), 7.32100009918)
CIRCLE ((43.35620117 35.939093), 2.32100009918)
CIRCLE ((47.35620117 26.939093), 33.3209991455)
CIRCLE ((5 5), 4.32100009918)
CIRCLE ((90.35620117 39.939093), 100.320999146)
CIRCLE ((90.35620117 47.939093), 88.3209991455)
CIRCLE ((90.35620117 49.939093), 76.3209991455)
CIRCLE ((90.35620117 59.939093), 75.3209991455)
CIRCLE ((98.35620117 36.939093), 45.3209991455)
CIRCLE ((-46.35620117 39.939093), 3.32100009918213)
CIRCLE ((-74.35620117 79.939093), 66.3209991455078)
CIRCLE ((126.35620117 -39.939093), 5.30000019073486)
CIRCLE ((16.35620117 19.939093), 7.32100009918213)
CIRCLE ((43.35620117 35.939093), 2.32100009918213)
CIRCLE ((47.35620117 26.939093), 33.3209991455078)
CIRCLE ((5 5), 4.32100009918213)
CIRCLE ((90.35620117 39.939093), 100.320999145508)
CIRCLE ((90.35620117 47.939093), 88.3209991455078)
CIRCLE ((90.35620117 49.939093), 76.3209991455078)
CIRCLE ((90.35620117 59.939093), 75.3209991455078)
CIRCLE ((98.35620117 36.939093), 45.3209991455078)
-- !sql_st_contains_Varchar_Varchar --
\N

View File

@ -3,5 +3,5 @@
100 test 2021-01-02
-- !select_geo1 --
POINT (123.123456789 89.123456789)
POINT (123.123456789012 89.123456789)

View File

@ -113,3 +113,18 @@ LINESTRING (1 1, 2 2)
-- !sql --
POLYGON ((114.104486 22.547119, 114.093758 22.547753, 114.096504 22.532057, 114.104229 22.539826, 114.106203 22.54268, 114.104486 22.547119))
-- !sql --
POINT (117.194767000297 36.46326301008)
-- !sql --
1.0
-- !sql --
POINT (1.23456789 2.34567891)
-- !sql --
0.9999999999999
-- !sql --
1.0000000000001

View File

@ -3,5 +3,5 @@
100 test 2021-01-02
-- !select_geo1 --
POINT (123.123456789 89.123456789)
POINT (123.123456789012 89.123456789)

View File

@ -70,4 +70,10 @@ suite("test_gis_function", "arrow_flight_sql") {
qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2 2)\"))));"
qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON ((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 22.539826,114.106203 22.542680,114.104486 22.547119))\"))));"
qt_sql "SELECT ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));"
qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));"
qt_sql "SELECT ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));"
qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));"
qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));"
}