Fix integer-overflow corner cases in substring() functions

2024-03-06 15:48:25 +08:00
parent 0cd970a846
commit 4aec02ea72
6 changed files with 196 additions and 77 deletions
--- a/src/common/backend/utils/adt/varbit.cpp
+++ b/src/common/backend/utils/adt/varbit.cpp
@ -945,15 +945,19 @@ static VarBit* bitsubstring(VarBit* arg, int32 s, int32 l, bool length_not_speci
    /* If we do not have an upper bound, use end of string */
    if (length_not_specified) {
        e1 = bitlen + 1;
-    } else {
-        e = s + l;
-
+    } else if (l < 0) {
+        /* SQL99 says to throw an error for E < S, i.e., negative length */
+        ereport(ERROR,
+                (errcode(ERRCODE_SUBSTRING_ERROR),
+                 errmsg("negative substring length not allowed")));
+        e1 = -1;				/* silence stupider compilers */
+    } else if (pg_add_s32_overflow(s, l, &e)) {
        /*
-         * A negative value for L is the only way for the end position to be
-         * before the start. SQL99 says to throw an error.
+         * L could be large enough for S + L to overflow, in which case the
+         * substring must run to end of string.
         */
-        if (e < s)
-            ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
+        e1 = bitlen + 1;
+    } else {
        e1 = Min(e, bitlen + 1);
    }
    if (s1 > bitlen || e1 <= s1) {
--- a/src/common/backend/utils/adt/varlena.cpp
+++ b/src/common/backend/utils/adt/varlena.cpp
@ -1211,7 +1211,14 @@ text* text_substring(Datum str, int32 start, int32 length, bool length_not_speci
    int32 S = start; /* start position */
    int32 S1;        /* adjusted start position */
    int32 L1;        /* adjusted substring length */
+    int32 E;         /* end position */

+    /*
+     * SQL99 says S can be zero or negative, but we still must fetch from the
+     * start of the string.
+     */
+    S1 = Max(S, 1);
+    
    /* life is easy if the encoding max length is 1 */
    if (eml == 1) {
        S1 = Max(S, 1);
@ -1219,22 +1226,17 @@ text* text_substring(Datum str, int32 start, int32 length, bool length_not_speci
        if (length_not_specified) /* special case - get length to end of
                                   * string */
            L1 = -1;
-        else {
-            /* end position */
-            int E = S + length;
-
+        else if (length < 0) {
+            /* SQL99 says to throw an error for E < S, i.e., negative length */
+            ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
+            L1 = -1; /* silence stupider compilers */
+        } else if (pg_add_s32_overflow(S, length, &E)) {
            /*
-             * A negative value for L is the only way for the end position to
-             * be before the start. SQL99 says to throw an error.
-             */
-            if (E < S)
-                ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
-
-            /*
-             * A zero or negative value for the end position can happen if the
-             * start was negative or one. SQL99 says to return a zero-length
-             * string.
+             * L could be large enough for S + L to overflow, in which case
+             * the substring must run to end of string.
             */
+            L1 = -1;
+        } else {
            if (E < 1)
                return cstring_to_text("");

@ -1243,8 +1245,8 @@ text* text_substring(Datum str, int32 start, int32 length, bool length_not_speci

        /*
         * If the start position is past the end of the string, SQL99 says to
-         * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
-         * that for us. Convert to zero-based starting position
+         * return a zero-length string -- DatumGetTextPSlice() will do that
+         * for us.  We need only convert S1 to zero-based starting position.
         */
        return DatumGetTextPSlice(str, S1 - 1, L1);
    } else if (eml > 1) {
@ -1263,12 +1265,6 @@ text* text_substring(Datum str, int32 start, int32 length, bool length_not_speci
        char* s = NULL;
        text* ret = NULL;

-        /*
-         * if S is past the end of the string, the tuple toaster will return a
-         * zero-length string to us
-         */
-        S1 = Max(S, 1);
-
        /*
         * We need to start at position zero because there is no way to know
         * in advance which byte offset corresponds to the supplied start
@ -1279,16 +1275,17 @@ text* text_substring(Datum str, int32 start, int32 length, bool length_not_speci
        if (length_not_specified) /* special case - get length to end of
                                   * string */
            slice_size = L1 = -1;
-        else {
-            int E = S + length;
-
+        else if (length < 0) {
+            /* SQL99 says to throw an error for E < S, i.e., negative length */
+            ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
+            slice_size = L1 = -1; /* silence stupider compilers */
+        } else if (pg_add_s32_overflow(S, length, &E)) {
            /*
-             * A negative value for L is the only way for the end position to
-             * be before the start. SQL99 says to throw an error.
+             * L could be large enough for S + L to overflow, in which case
+             * the substring must run to end of string.
             */
-            if (E < S)
-                ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
-
+            slice_size = L1 = -1;
+        } else {
            /*
             * A zero or negative value for the end position can happen if the
             * start was negative or one. SQL99 says to return a zero-length
@ -1306,8 +1303,11 @@ text* text_substring(Datum str, int32 start, int32 length, bool length_not_speci
            /*
             * Total slice size in bytes can't be any longer than the start
             * position plus substring length times the encoding max length.
+             * If that overflows, we can just use -1.
             */
-            slice_size = (S1 + L1) * eml;
+            if (pg_mul_s32_overflow(E, eml, &slice_size)) {
+                slice_size = -1;
+            }
        }

        /*
@ -3278,8 +3278,9 @@ Datum bytea_substr_no_len(PG_FUNCTION_ARGS)

 bytea* bytea_substring(Datum str, int S, int L, bool length_not_specified)
 {
-    int S1; /* adjusted start position */
-    int L1; /* adjusted substring length */
+    int32 S1; /* adjusted start position */
+    int32 L1; /* adjusted substring length */
+    int32		E;				/* end position */

    S1 = Max(S, 1);

@ -3289,17 +3290,17 @@ bytea* bytea_substring(Datum str, int S, int L, bool length_not_specified)
         * end of the string if we pass it a negative value for length.
         */
        L1 = -1;
-    } else {
-        /* end position */
-        int E = S + L;
-
+    } else if (L < 0) {
+        /* SQL99 says to throw an error for E < S, i.e., negative length */
+        ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
+        L1 = -1; /* silence stupider compilers */
+    } else if (pg_add_s32_overflow(S, L, &E)) {
        /*
-         * A negative value for L is the only way for the end position to be
-         * before the start. SQL99 says to throw an error.
+         * L could be large enough for S + L to overflow, in which case the
+         * substring must run to end of string.
         */
-        if (E < S)
-            ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
-
+        L1 = -1;
+    } else {
        /*
         * A zero or negative value for the end position can happen if the
         * start was negative or one. SQL99 says to return a zero-length
@ -3314,7 +3315,7 @@ bytea* bytea_substring(Datum str, int S, int L, bool length_not_specified)
    /*
     * If the start position is past the end of the string, SQL99 says to
     * return a zero-length string -- DatumGetByteaPSlice() will do that for
-     * us. Convert to zero-based starting position
+     * us.  We need only convert S1 to zero-based starting position.
     */
    return DatumGetByteaPSlice(str, S1 - 1, L1);
 }
--- a/src/gausskernel/runtime/vecexecutor/vecprimitive/varchar.inl
+++ b/src/gausskernel/runtime/vecexecutor/vecprimitive/varchar.inl
@ -27,7 +27,7 @@
 #include "fmgr.h"
 #include "vecexecutor/vecfunc.h"
 #include "access/tuptoaster.h"
-
+#include "common/int.h"

 /* Comparison Functions used for bpchar type
 * vectorize function
@ -332,7 +332,7 @@ vec_text_substr(Datum str, int32 start, int32 length, bool *is_null, mblen_conve
 	int32	i;
 	char	*p = NULL;
 	char	*s = NULL;
-	int		E;					/* end position */
+	int32	E;					/* end position */
 	int32	slice_size;
 	int32	slice_strlen;
 	int32	E1;
@ -382,28 +382,25 @@ vec_text_substr(Datum str, int32 start, int32 length, bool *is_null, mblen_conve
 	}
 	else
 	{
-		E = S + length;
-		/*
-		 * A negative value for L is the only way for the end position to
-		 * be before the start. SQL99 says to throw an error.
-		 */
-		if (E < S)
-		{
+        /*
+         * A negative value for L is the only way for the end position to
+         * be before the start. SQL99 says to throw an error.
+         */
+        if (length < 0) {
+            /* SQL99 says to throw an error for E < S, i.e., negative length */
+            ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed")));
+            L1 = -1; /* silence stupider compilers */
+        } else if (pg_add_s32_overflow(S, length, &E)) {
+            /*
+             * L could be large enough for S + L to overflow, in which case the
+             * substring must run to end of string.
+             */
+            L1 = -1;
+        } else {
+            /* do nothing */
+        }

-			if (length < 0)
-			{
-				ereport(ERROR,
-						(errcode(ERRCODE_SUBSTRING_ERROR),
-						 errmsg("negative substring length not allowed")));
-			}
-			else
-			{
-				ereport(ERROR,
-						(errcode(ERRCODE_SUBSTRING_ERROR),
-						errmsg("the giving length is too long, it lets the end postion integer out of range")));
-			}
-		}
-		/*
+        /*
 		 * A zero or negative value for the end position can happen if the
 		 * start was negative or one. SQL99 says to return a zero-length
 		 * string.
--- a/src/test/regress/expected/bit.out
+++ b/src/test/regress/expected/bit.out
@ -109,6 +109,94 @@ SELECT v,
 01010101010 | 1010    | 01010    | 101010
 (4 rows)

+-- test overflow cases
+SELECT SUBSTRING('01010101'::bit(8) FROM 2 FOR 2147483646) AS "1010101";
+ 1010101 
+---------
+ 1010101
+(1 row)
+
+SELECT SUBSTRING('01010101'::bit(8) FROM -10 FOR 2147483646) AS "01010101";
+ 01010101 
+----------
+ 01010101
+(1 row)
+
+SELECT SUBSTRING('01010101'::bit(8) FROM -10 FOR -2147483646) AS "error";
+ERROR:  negative substring length not allowed
+CONTEXT:  referenced column: error
+SELECT SUBSTRING('01010101'::varbit FROM 2 FOR 2147483646) AS "1010101";
+ 1010101 
+---------
+ 1010101
+(1 row)
+
+SELECT SUBSTRING('01010101'::varbit FROM -10 FOR 2147483646) AS "01010101";
+ 01010101 
+----------
+ 01010101
+(1 row)
+
+SELECT SUBSTRING('01010101'::varbit FROM -10 FOR -2147483646) AS "error";
+ERROR:  negative substring length not allowed
+CONTEXT:  referenced column: error
+-- test overflow cases
+set bytea_output to escape;
+SELECT SUBSTRING('string' FROM 2 FOR 2147483646) AS "tring";
+ tring 
+-------
+ tring
+(1 row)
+
+SELECT SUBSTRING('string' FROM -10 FOR 2147483646) AS "string";
+ string 
+--------
+ string
+(1 row)
+
+SELECT SUBSTRING('string' FROM -10 FOR -2147483646) AS "error";
+ERROR:  negative substring length not allowed
+CONTEXT:  referenced column: error
+SELECT SUBSTRING('1234567890'::bytea FROM 3) "34567890";
+ 34567890 
+----------
+ 34567890
+(1 row)
+
+SELECT SUBSTRING('1234567890'::bytea FROM 4 FOR 3) AS "456";
+ 456 
+-----
+ 456
+(1 row)
+
+SELECT SUBSTRING('string'::bytea FROM 2 FOR 2147483646) AS "tring";
+ tring 
+-------
+ tring
+(1 row)
+
+SELECT SUBSTRING('string'::bytea FROM -10 FOR 2147483646) AS "string";
+ string 
+--------
+ string
+(1 row)
+
+SELECT SUBSTRING('string'::bytea FROM -10 FOR -2147483646) AS "error";
+ERROR:  negative substring length not allowed
+CONTEXT:  referenced column: error
+SELECT left('abcde', 2147483646) as "abcde";
+ abcde 
+-------
+ abcde
+(1 row)
+
+SELECT left('abcde', 2147483647) as "abcde";
+ abcde 
+-------
+ abcde
+(1 row)
+
+reset bytea_output;
 --- Bit operations
 DROP TABLE varbit_table;
 CREATE TABLE varbit_table (a BIT VARYING(16), b BIT VARYING(16));
--- a/src/test/regress/expected/hw_functions.out
+++ b/src/test/regress/expected/hw_functions.out
@ -206,11 +206,17 @@ ERROR:  integer out of range
 CONTEXT:  SQL function "substr" during inlining
 referenced column: substr
 select substr('abc', 2, 2147483647);
-ERROR:  the giving length is too long, it lets the end postion integer out of range
-CONTEXT:  referenced column: substr
+ substr 
+--------
+ bc
+(1 row)
+
 select substr('jkeifkekls', -5, 2147483645);
-ERROR:  the giving length is too long, it lets the end postion integer out of range
-CONTEXT:  referenced column: substr
+ substr 
+--------
+ kekls
+(1 row)
+
 -- tests for numtodsinterval
 set intervalstyle=a;
 select numtodsinterval(1500,'HOUR');
--- a/src/test/regress/sql/bit.sql
+++ b/src/test/regress/sql/bit.sql
@ -52,6 +52,29 @@ SELECT v,
       SUBSTRING(v FROM 6) AS sub_6
       FROM VARBIT_TABLE ORDER BY v;

+-- test overflow cases
+SELECT SUBSTRING('01010101'::bit(8) FROM 2 FOR 2147483646) AS "1010101";
+SELECT SUBSTRING('01010101'::bit(8) FROM -10 FOR 2147483646) AS "01010101";
+SELECT SUBSTRING('01010101'::bit(8) FROM -10 FOR -2147483646) AS "error";
+SELECT SUBSTRING('01010101'::varbit FROM 2 FOR 2147483646) AS "1010101";
+SELECT SUBSTRING('01010101'::varbit FROM -10 FOR 2147483646) AS "01010101";
+SELECT SUBSTRING('01010101'::varbit FROM -10 FOR -2147483646) AS "error";
+
+-- test overflow cases
+set bytea_output to escape;
+SELECT SUBSTRING('string' FROM 2 FOR 2147483646) AS "tring";
+SELECT SUBSTRING('string' FROM -10 FOR 2147483646) AS "string";
+SELECT SUBSTRING('string' FROM -10 FOR -2147483646) AS "error";
+SELECT SUBSTRING('1234567890'::bytea FROM 3) "34567890";
+SELECT SUBSTRING('1234567890'::bytea FROM 4 FOR 3) AS "456";
+SELECT SUBSTRING('string'::bytea FROM 2 FOR 2147483646) AS "tring";
+SELECT SUBSTRING('string'::bytea FROM -10 FOR 2147483646) AS "string";
+SELECT SUBSTRING('string'::bytea FROM -10 FOR -2147483646) AS "error";
+SELECT left('abcde', 2147483646) as "abcde";
+SELECT left('abcde', 2147483647) as "abcde";
+
+reset bytea_output;
+
 --- Bit operations
 DROP TABLE varbit_table;
 CREATE TABLE varbit_table (a BIT VARYING(16), b BIT VARYING(16));