Files
postgresql/src/backend/utils/adt/numutils.c
David Rowley 4e2e75cd29 Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.

Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.

While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.

Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 12:06:08 +12:00

1316 lines
30 KiB
C

/*-------------------------------------------------------------------------
*
* numutils.c
* utility functions for I/O of built-in numeric types.
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/utils/adt/numutils.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include <limits.h>
#include <ctype.h>
#include "common/int.h"
#include "utils/builtins.h"
#include "port/pg_bitutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
* generation by copying pairs of digits into the final output.
*/
static const char DIGIT_TABLE[200] =
"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
/*
* Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
*/
static inline int
decimalLength32(const uint32 v)
{
int t;
static const uint32 PowersOfTen[] = {
1, 10, 100,
1000, 10000, 100000,
1000000, 10000000, 100000000,
1000000000
};
/*
* Compute base-10 logarithm by dividing the base-2 logarithm by a
* good-enough approximation of the base-2 logarithm of 10
*/
t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
return t + (v >= PowersOfTen[t]);
}
static inline int
decimalLength64(const uint64 v)
{
int t;
static const uint64 PowersOfTen[] = {
UINT64CONST(1), UINT64CONST(10),
UINT64CONST(100), UINT64CONST(1000),
UINT64CONST(10000), UINT64CONST(100000),
UINT64CONST(1000000), UINT64CONST(10000000),
UINT64CONST(100000000), UINT64CONST(1000000000),
UINT64CONST(10000000000), UINT64CONST(100000000000),
UINT64CONST(1000000000000), UINT64CONST(10000000000000),
UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
};
/*
* Compute base-10 logarithm by dividing the base-2 logarithm by a
* good-enough approximation of the base-2 logarithm of 10
*/
t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
return t + (v >= PowersOfTen[t]);
}
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
/*
* Convert input string to a signed 16 bit integer. Input strings may be
* expressed in base-10, hexadecimal, octal, or binary format, all of which
* can be prefixed by an optional sign character, either '+' (the default) or
* '-' for negative numbers. Hex strings are recognized by the digits being
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
* prefix. The binary representation is recognized by the 0b or 0B prefix.
*
* Allows any number of leading or trailing whitespace characters. Digits may
* optionally be separated by a single underscore character. These can only
* come between digits and not before or after the digits. Underscores have
* no effect on the return value and are supported only to assist in improving
* the human readability of the input strings.
*
* pg_strtoint16() will throw ereport() upon bad input format or overflow;
* while pg_strtoint16_safe() instead returns such complaints in *escontext,
* if it's an ErrorSaveContext.
*
* NB: Accumulate input as an unsigned number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* signed positive number.
*/
int16
pg_strtoint16(const char *s)
{
return pg_strtoint16_safe(s, NULL);
}
int16
pg_strtoint16_safe(const char *s, Node *escontext)
{
const char *ptr = s;
const char *firstdigit;
uint16 tmp = 0;
bool neg = false;
unsigned char digit;
/*
* The majority of cases are likely to be base-10 digits without any
* underscore separator characters. We'll first try to parse the string
* with the assumption that's the case and only fallback on a slower
* implementation which handles hex, octal and binary strings and
* underscores if the fastpath version cannot parse the string.
*/
/* leave it up to the slow path to look for leading spaces */
if (*ptr == '-')
{
ptr++;
neg = true;
}
/* a leading '+' is uncommon so leave that for the slow path */
/* process the first digit */
digit = (*ptr - '0');
/*
* Exploit unsigned arithmetic to save having to check both the upper and
* lower bounds of the digit.
*/
if (likely(digit < 10))
{
ptr++;
tmp = digit;
}
else
{
/* we need at least one digit */
goto slow;
}
/* process remaining digits */
for (;;)
{
digit = (*ptr - '0');
if (digit >= 10)
break;
ptr++;
if (unlikely(tmp > -(PG_INT16_MIN / 10)))
goto out_of_range;
tmp = tmp * 10 + digit;
}
/* when the string does not end in a digit, let the slow path handle it */
if (unlikely(*ptr != '\0'))
goto slow;
if (neg)
{
/* check the negative equivalent will fit without overflowing */
if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
goto out_of_range;
return -((int16) tmp);
}
if (unlikely(tmp > PG_INT16_MAX))
goto out_of_range;
return (int16) tmp;
slow:
tmp = 0;
ptr = s;
/* no need to reset neg */
/* skip leading spaces */
while (isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* process digits */
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
{
firstdigit = ptr += 2;
for (;;)
{
if (isxdigit((unsigned char) *ptr))
{
if (unlikely(tmp > -(PG_INT16_MIN / 16)))
goto out_of_range;
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
goto invalid_syntax;
}
else
break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
{
firstdigit = ptr += 2;
for (;;)
{
if (*ptr >= '0' && *ptr <= '7')
{
if (unlikely(tmp > -(PG_INT16_MIN / 8)))
goto out_of_range;
tmp = tmp * 8 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
goto invalid_syntax;
}
else
break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
{
firstdigit = ptr += 2;
for (;;)
{
if (*ptr >= '0' && *ptr <= '1')
{
if (unlikely(tmp > -(PG_INT16_MIN / 2)))
goto out_of_range;
tmp = tmp * 2 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
goto invalid_syntax;
}
else
break;
}
}
else
{
firstdigit = ptr;
for (;;)
{
if (*ptr >= '0' && *ptr <= '9')
{
if (unlikely(tmp > -(PG_INT16_MIN / 10)))
goto out_of_range;
tmp = tmp * 10 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore may not be first */
if (unlikely(ptr == firstdigit))
goto invalid_syntax;
/* and it must be followed by more digits */
ptr++;
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
goto invalid_syntax;
}
else
break;
}
}
/* require at least one digit */
if (unlikely(ptr == firstdigit))
goto invalid_syntax;
/* allow trailing whitespace, but not other trailing chars */
while (isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (neg)
{
/* check the negative equivalent will fit without overflowing */
if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
goto out_of_range;
return -((int16) tmp);
}
if (tmp > PG_INT16_MAX)
goto out_of_range;
return (int16) tmp;
out_of_range:
ereturn(escontext, 0,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "smallint")));
invalid_syntax:
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"smallint", s)));
}
/*
* Convert input string to a signed 32 bit integer. Input strings may be
* expressed in base-10, hexadecimal, octal, or binary format, all of which
* can be prefixed by an optional sign character, either '+' (the default) or
* '-' for negative numbers. Hex strings are recognized by the digits being
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
* prefix. The binary representation is recognized by the 0b or 0B prefix.
*
* Allows any number of leading or trailing whitespace characters. Digits may
* optionally be separated by a single underscore character. These can only
* come between digits and not before or after the digits. Underscores have
* no effect on the return value and are supported only to assist in improving
* the human readability of the input strings.
*
* pg_strtoint32() will throw ereport() upon bad input format or overflow;
* while pg_strtoint32_safe() instead returns such complaints in *escontext,
* if it's an ErrorSaveContext.
*
* NB: Accumulate input as an unsigned number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* signed positive number.
*/
int32
pg_strtoint32(const char *s)
{
return pg_strtoint32_safe(s, NULL);
}
int32
pg_strtoint32_safe(const char *s, Node *escontext)
{
const char *ptr = s;
const char *firstdigit;
uint32 tmp = 0;
bool neg = false;
unsigned char digit;
/*
* The majority of cases are likely to be base-10 digits without any
* underscore separator characters. We'll first try to parse the string
* with the assumption that's the case and only fallback on a slower
* implementation which handles hex, octal and binary strings and
* underscores if the fastpath version cannot parse the string.
*/
/* leave it up to the slow path to look for leading spaces */
if (*ptr == '-')
{
ptr++;
neg = true;
}
/* a leading '+' is uncommon so leave that for the slow path */
/* process the first digit */
digit = (*ptr - '0');
/*
* Exploit unsigned arithmetic to save having to check both the upper and
* lower bounds of the digit.
*/
if (likely(digit < 10))
{
ptr++;
tmp = digit;
}
else
{
/* we need at least one digit */
goto slow;
}
/* process remaining digits */
for (;;)
{
digit = (*ptr - '0');
if (digit >= 10)
break;
ptr++;
if (unlikely(tmp > -(PG_INT32_MIN / 10)))
goto out_of_range;
tmp = tmp * 10 + digit;
}
/* when the string does not end in a digit, let the slow path handle it */
if (unlikely(*ptr != '\0'))
goto slow;
if (neg)
{
/* check the negative equivalent will fit without overflowing */
if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
goto out_of_range;
return -((int32) tmp);
}
if (unlikely(tmp > PG_INT32_MAX))
goto out_of_range;
return (int32) tmp;
slow:
tmp = 0;
ptr = s;
/* no need to reset neg */
/* skip leading spaces */
while (isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* process digits */
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
{
firstdigit = ptr += 2;
for (;;)
{
if (isxdigit((unsigned char) *ptr))
{
if (unlikely(tmp > -(PG_INT32_MIN / 16)))
goto out_of_range;
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
goto invalid_syntax;
}
else
break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
{
firstdigit = ptr += 2;
for (;;)
{
if (*ptr >= '0' && *ptr <= '7')
{
if (unlikely(tmp > -(PG_INT32_MIN / 8)))
goto out_of_range;
tmp = tmp * 8 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
goto invalid_syntax;
}
else
break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
{
firstdigit = ptr += 2;
for (;;)
{
if (*ptr >= '0' && *ptr <= '1')
{
if (unlikely(tmp > -(PG_INT32_MIN / 2)))
goto out_of_range;
tmp = tmp * 2 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
goto invalid_syntax;
}
else
break;
}
}
else
{
firstdigit = ptr;
for (;;)
{
if (*ptr >= '0' && *ptr <= '9')
{
if (unlikely(tmp > -(PG_INT32_MIN / 10)))
goto out_of_range;
tmp = tmp * 10 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore may not be first */
if (unlikely(ptr == firstdigit))
goto invalid_syntax;
/* and it must be followed by more digits */
ptr++;
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
goto invalid_syntax;
}
else
break;
}
}
/* require at least one digit */
if (unlikely(ptr == firstdigit))
goto invalid_syntax;
/* allow trailing whitespace, but not other trailing chars */
while (isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (neg)
{
/* check the negative equivalent will fit without overflowing */
if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
goto out_of_range;
return -((int32) tmp);
}
if (tmp > PG_INT32_MAX)
goto out_of_range;
return (int32) tmp;
out_of_range:
ereturn(escontext, 0,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "integer")));
invalid_syntax:
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"integer", s)));
}
/*
* Convert input string to a signed 64 bit integer. Input strings may be
* expressed in base-10, hexadecimal, octal, or binary format, all of which
* can be prefixed by an optional sign character, either '+' (the default) or
* '-' for negative numbers. Hex strings are recognized by the digits being
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
* prefix. The binary representation is recognized by the 0b or 0B prefix.
*
* Allows any number of leading or trailing whitespace characters. Digits may
* optionally be separated by a single underscore character. These can only
* come between digits and not before or after the digits. Underscores have
* no effect on the return value and are supported only to assist in improving
* the human readability of the input strings.
*
* pg_strtoint64() will throw ereport() upon bad input format or overflow;
* while pg_strtoint64_safe() instead returns such complaints in *escontext,
* if it's an ErrorSaveContext.
*
* NB: Accumulate input as an unsigned number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* signed positive number.
*/
int64
pg_strtoint64(const char *s)
{
return pg_strtoint64_safe(s, NULL);
}
int64
pg_strtoint64_safe(const char *s, Node *escontext)
{
const char *ptr = s;
const char *firstdigit;
uint64 tmp = 0;
bool neg = false;
unsigned char digit;
/*
* The majority of cases are likely to be base-10 digits without any
* underscore separator characters. We'll first try to parse the string
* with the assumption that's the case and only fallback on a slower
* implementation which handles hex, octal and binary strings and
* underscores if the fastpath version cannot parse the string.
*/
/* leave it up to the slow path to look for leading spaces */
if (*ptr == '-')
{
ptr++;
neg = true;
}
/* a leading '+' is uncommon so leave that for the slow path */
/* process the first digit */
digit = (*ptr - '0');
/*
* Exploit unsigned arithmetic to save having to check both the upper and
* lower bounds of the digit.
*/
if (likely(digit < 10))
{
ptr++;
tmp = digit;
}
else
{
/* we need at least one digit */
goto slow;
}
/* process remaining digits */
for (;;)
{
digit = (*ptr - '0');
if (digit >= 10)
break;
ptr++;
if (unlikely(tmp > -(PG_INT64_MIN / 10)))
goto out_of_range;
tmp = tmp * 10 + digit;
}
/* when the string does not end in a digit, let the slow path handle it */
if (unlikely(*ptr != '\0'))
goto slow;
if (neg)
{
/* check the negative equivalent will fit without overflowing */
if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1))
goto out_of_range;
return -((int64) tmp);
}
if (unlikely(tmp > PG_INT64_MAX))
goto out_of_range;
return (int64) tmp;
slow:
tmp = 0;
ptr = s;
/* no need to reset neg */
/* skip leading spaces */
while (isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* process digits */
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
{
firstdigit = ptr += 2;
for (;;)
{
if (isxdigit((unsigned char) *ptr))
{
if (unlikely(tmp > -(PG_INT64_MIN / 16)))
goto out_of_range;
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
goto invalid_syntax;
}
else
break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
{
firstdigit = ptr += 2;
for (;;)
{
if (*ptr >= '0' && *ptr <= '7')
{
if (unlikely(tmp > -(PG_INT64_MIN / 8)))
goto out_of_range;
tmp = tmp * 8 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
goto invalid_syntax;
}
else
break;
}
}
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
{
firstdigit = ptr += 2;
for (;;)
{
if (*ptr >= '0' && *ptr <= '1')
{
if (unlikely(tmp > -(PG_INT64_MIN / 2)))
goto out_of_range;
tmp = tmp * 2 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore must be followed by more digits */
ptr++;
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
goto invalid_syntax;
}
else
break;
}
}
else
{
firstdigit = ptr;
for (;;)
{
if (*ptr >= '0' && *ptr <= '9')
{
if (unlikely(tmp > -(PG_INT64_MIN / 10)))
goto out_of_range;
tmp = tmp * 10 + (*ptr++ - '0');
}
else if (*ptr == '_')
{
/* underscore may not be first */
if (unlikely(ptr == firstdigit))
goto invalid_syntax;
/* and it must be followed by more digits */
ptr++;
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
goto invalid_syntax;
}
else
break;
}
}
/* require at least one digit */
if (unlikely(ptr == firstdigit))
goto invalid_syntax;
/* allow trailing whitespace, but not other trailing chars */
while (isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (neg)
{
/* check the negative equivalent will fit without overflowing */
if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
goto out_of_range;
return -((int64) tmp);
}
if (tmp > PG_INT64_MAX)
goto out_of_range;
return (int64) tmp;
out_of_range:
ereturn(escontext, 0,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "bigint")));
invalid_syntax:
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"bigint", s)));
}
/*
* Convert input string to an unsigned 32 bit integer.
*
* Allows any number of leading or trailing whitespace characters.
*
* If endloc isn't NULL, store a pointer to the rest of the string there,
* so that caller can parse the rest. Otherwise, it's an error if anything
* but whitespace follows.
*
* typname is what is reported in error messges.
*
* If escontext points to an ErrorSaveContext node, that is filled instead
* of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
* to detect errors.
*/
uint32
uint32in_subr(const char *s, char **endloc,
const char *typname, Node *escontext)
{
uint32 result;
unsigned long cvt;
char *endptr;
errno = 0;
cvt = strtoul(s, &endptr, 0);
/*
* strtoul() normally only sets ERANGE. On some systems it may also set
* EINVAL, which simply means it couldn't parse the input string. Be sure
* to report that the same way as the standard error indication (that
* endptr == s).
*/
if ((errno && errno != ERANGE) || endptr == s)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
typname, s)));
if (errno == ERANGE)
ereturn(escontext, 0,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, typname)));
if (endloc)
{
/* caller wants to deal with rest of string */
*endloc = endptr;
}
else
{
/* allow only whitespace after number */
while (*endptr && isspace((unsigned char) *endptr))
endptr++;
if (*endptr)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
typname, s)));
}
result = (uint32) cvt;
/*
* Cope with possibility that unsigned long is wider than uint32, in which
* case strtoul will not raise an error for some values that are out of
* the range of uint32.
*
* For backwards compatibility, we want to accept inputs that are given
* with a minus sign, so allow the input value if it matches after either
* signed or unsigned extension to long.
*
* To ensure consistent results on 32-bit and 64-bit platforms, make sure
* the error message is the same as if strtoul() had returned ERANGE.
*/
#if PG_UINT32_MAX != ULONG_MAX
if (cvt != (unsigned long) result &&
cvt != (unsigned long) ((int) result))
ereturn(escontext, 0,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, typname)));
#endif
return result;
}
/*
* Convert input string to an unsigned 64 bit integer.
*
* Allows any number of leading or trailing whitespace characters.
*
* If endloc isn't NULL, store a pointer to the rest of the string there,
* so that caller can parse the rest. Otherwise, it's an error if anything
* but whitespace follows.
*
* typname is what is reported in error messges.
*
* If escontext points to an ErrorSaveContext node, that is filled instead
* of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
* to detect errors.
*/
uint64
uint64in_subr(const char *s, char **endloc,
const char *typname, Node *escontext)
{
uint64 result;
char *endptr;
errno = 0;
result = strtou64(s, &endptr, 0);
/*
* strtoul[l] normally only sets ERANGE. On some systems it may also set
* EINVAL, which simply means it couldn't parse the input string. Be sure
* to report that the same way as the standard error indication (that
* endptr == s).
*/
if ((errno && errno != ERANGE) || endptr == s)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
typname, s)));
if (errno == ERANGE)
ereturn(escontext, 0,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, typname)));
if (endloc)
{
/* caller wants to deal with rest of string */
*endloc = endptr;
}
else
{
/* allow only whitespace after number */
while (*endptr && isspace((unsigned char) *endptr))
endptr++;
if (*endptr)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
typname, s)));
}
return result;
}
/*
* pg_itoa: converts a signed 16-bit integer to its string representation
* and returns strlen(a).
*
* Caller must ensure that 'a' points to enough memory to hold the result
* (at least 7 bytes, counting a leading sign and trailing NUL).
*
* It doesn't seem worth implementing this separately.
*/
int
pg_itoa(int16 i, char *a)
{
return pg_ltoa((int32) i, a);
}
/*
* pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
* not NUL-terminated, and returns the length of that string representation
*
* Caller must ensure that 'a' points to enough memory to hold the result (at
* least 10 bytes)
*/
int
pg_ultoa_n(uint32 value, char *a)
{
int olength,
i = 0;
/* Degenerate case */
if (value == 0)
{
*a = '0';
return 1;
}
olength = decimalLength32(value);
/* Compute the result string. */
while (value >= 10000)
{
const uint32 c = value - 10000 * (value / 10000);
const uint32 c0 = (c % 100) << 1;
const uint32 c1 = (c / 100) << 1;
char *pos = a + olength - i;
value /= 10000;
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
i += 4;
}
if (value >= 100)
{
const uint32 c = (value % 100) << 1;
char *pos = a + olength - i;
value /= 100;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
i += 2;
}
if (value >= 10)
{
const uint32 c = value << 1;
char *pos = a + olength - i;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
}
else
{
*a = (char) ('0' + value);
}
return olength;
}
/*
* pg_ltoa: converts a signed 32-bit integer to its string representation and
* returns strlen(a).
*
* It is the caller's responsibility to ensure that a is at least 12 bytes long,
* which is enough room to hold a minus sign, a maximally long int32, and the
* above terminating NUL.
*/
int
pg_ltoa(int32 value, char *a)
{
uint32 uvalue = (uint32) value;
int len = 0;
if (value < 0)
{
uvalue = (uint32) 0 - uvalue;
a[len++] = '-';
}
len += pg_ultoa_n(uvalue, a + len);
a[len] = '\0';
return len;
}
/*
* Get the decimal representation, not NUL-terminated, and return the length of
* same. Caller must ensure that a points to at least MAXINT8LEN bytes.
*/
int
pg_ulltoa_n(uint64 value, char *a)
{
int olength,
i = 0;
uint32 value2;
/* Degenerate case */
if (value == 0)
{
*a = '0';
return 1;
}
olength = decimalLength64(value);
/* Compute the result string. */
while (value >= 100000000)
{
const uint64 q = value / 100000000;
uint32 value3 = (uint32) (value - 100000000 * q);
const uint32 c = value3 % 10000;
const uint32 d = value3 / 10000;
const uint32 c0 = (c % 100) << 1;
const uint32 c1 = (c / 100) << 1;
const uint32 d0 = (d % 100) << 1;
const uint32 d1 = (d / 100) << 1;
char *pos = a + olength - i;
value = q;
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
memcpy(pos - 6, DIGIT_TABLE + d0, 2);
memcpy(pos - 8, DIGIT_TABLE + d1, 2);
i += 8;
}
/* Switch to 32-bit for speed */
value2 = (uint32) value;
if (value2 >= 10000)
{
const uint32 c = value2 - 10000 * (value2 / 10000);
const uint32 c0 = (c % 100) << 1;
const uint32 c1 = (c / 100) << 1;
char *pos = a + olength - i;
value2 /= 10000;
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
i += 4;
}
if (value2 >= 100)
{
const uint32 c = (value2 % 100) << 1;
char *pos = a + olength - i;
value2 /= 100;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
i += 2;
}
if (value2 >= 10)
{
const uint32 c = value2 << 1;
char *pos = a + olength - i;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
}
else
*a = (char) ('0' + value2);
return olength;
}
/*
* pg_lltoa: converts a signed 64-bit integer to its string representation and
* returns strlen(a).
*
* Caller must ensure that 'a' points to enough memory to hold the result
* (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
*/
int
pg_lltoa(int64 value, char *a)
{
uint64 uvalue = value;
int len = 0;
if (value < 0)
{
uvalue = (uint64) 0 - uvalue;
a[len++] = '-';
}
len += pg_ulltoa_n(uvalue, a + len);
a[len] = '\0';
return len;
}
/*
* pg_ultostr_zeropad
* Converts 'value' into a decimal string representation stored at 'str'.
* 'minwidth' specifies the minimum width of the result; any extra space
* is filled up by prefixing the number with zeros.
*
* Returns the ending address of the string result (the last character written
* plus 1). Note that no NUL terminator is written.
*
* The intended use-case for this function is to build strings that contain
* multiple individual numbers, for example:
*
* str = pg_ultostr_zeropad(str, hours, 2);
* *str++ = ':';
* str = pg_ultostr_zeropad(str, mins, 2);
* *str++ = ':';
* str = pg_ultostr_zeropad(str, secs, 2);
* *str = '\0';
*
* Note: Caller must ensure that 'str' points to enough memory to hold the
* result.
*/
char *
pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
{
int len;
Assert(minwidth > 0);
if (value < 100 && minwidth == 2) /* Short cut for common case */
{
memcpy(str, DIGIT_TABLE + value * 2, 2);
return str + 2;
}
len = pg_ultoa_n(value, str);
if (len >= minwidth)
return str + len;
memmove(str + minwidth - len, str, len);
memset(str, '0', minwidth - len);
return str + minwidth;
}
/*
* pg_ultostr
* Converts 'value' into a decimal string representation stored at 'str'.
*
* Returns the ending address of the string result (the last character written
* plus 1). Note that no NUL terminator is written.
*
* The intended use-case for this function is to build strings that contain
* multiple individual numbers, for example:
*
* str = pg_ultostr(str, a);
* *str++ = ' ';
* str = pg_ultostr(str, b);
* *str = '\0';
*
* Note: Caller must ensure that 'str' points to enough memory to hold the
* result.
*/
char *
pg_ultostr(char *str, uint32 value)
{
int len = pg_ultoa_n(value, str);
return str + len;
}