Update bundled PCRE2-library to version 10.23

Some manual changes done to the library were lost with this update. They will be added in the next commit.
2017-05-29 15:31:42 +03:00
parent 7231563937
commit 36af74cb25
218 changed files with 49218 additions and 26130 deletions
--- a/pcre2/src/pcre2_internal.h
+++ b/pcre2/src/pcre2_internal.h
@ -2,12 +2,12 @@
 *      Perl-Compatible Regular Expressions       *
 *************************************************/

-/* PCRE is a library of functions to support regular expressions whose syntax
+/* PCRE2 is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2015 University of Cambridge
+         New API code Copyright (c) 2016 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -39,7 +39,10 @@ POSSIBILITY OF SUCH DAMAGE.
 */

 /* We do not support both EBCDIC and Unicode at the same time. The "configure"
-script prevents both being selected, but not everybody uses "configure". */
+script prevents both being selected, but not everybody uses "configure". EBCDIC
+is only supported for the 8-bit library, but the check for this has to be later
+in this file, because the first part is not width-dependent, and is included by
+pcre2test.c with CODE_UNIT_WIDTH == 0. */

 #if defined EBCDIC && defined SUPPORT_UNICODE
 #error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
@ -70,6 +73,14 @@ typedef int BOOL;
 #include <valgrind/memcheck.h>
 #endif

+/* Older versions of MSVC lack snprintf(). This define allows for
+warning/error-free compilation and testing with MSVC compilers back to at least
+MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define snprintf _snprintf
+#endif
+
 /* When compiling a DLL for Windows, the exported symbols have to be declared
 using some MS magic. I found some useful information on this web page:
 http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
@ -131,20 +142,6 @@ pcre2_match() because of the way it backtracks. */
 #define PCRE2_SPTR CUSTOM_SUBJECT_PTR
 #endif

-/* When compiling with the MSVC compiler, it is sometimes necessary to include
-a "calling convention" before exported function names. (This is secondhand
-information; I know nothing about MSVC myself). For example, something like
-
-  void __cdecl function(....)
-
-might be needed. In order so make this easy, all the exported functions have
-PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
-set, we ensure here that it has no effect. */
-
-#ifndef PCRE2_CALL_CONVENTION
-#define PCRE2_CALL_CONVENTION
-#endif
-
 /* When checking for integer overflow in pcre2_compile(), we need to handle
 large integers. If a 64-bit integer type is available, we can use that.
 Otherwise we have to cast to double, which of course requires floating point
@ -166,7 +163,7 @@ by "configure". */
 #endif

 /* When compiling for use with the Virtual Pascal compiler, these functions
-need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
+need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
 option on the command line. */

 #ifdef VPCOMPAT
@ -189,7 +186,7 @@ neither (there some non-Unix environments where this is the case). */
 #define memmove(a, b, c) bcopy(b, a, c)
 #else  /* HAVE_BCOPY */
 static void *
-pcre_memmove(void *d, const void *s, size_t n)
+pcre2_memmove(void *d, const void *s, size_t n)
 {
 size_t i;
 unsigned char *dest = (unsigned char *)d;
@ -207,7 +204,7 @@ else
  return (void *)(dest - n);
  }
 }
-#define memmove(a, b, c) pcre_memmove(a, b, c)
+#define memmove(a, b, c) pcre2_memmove(a, b, c)
 #endif   /* not HAVE_BCOPY */
 #endif   /* not HAVE_MEMMOVE */
 #endif   /* not VPCOMPAT */
@ -231,8 +228,15 @@ Unicode doesn't go beyond 0x0010ffff. */

 #define MAX_UTF_CODE_POINT 0x10ffff

-/* Compile-time errors are added to this value. As they are documented, it
-should probably never be changed. */
+/* Compile-time positive error numbers (all except UTF errors, which are
+negative) start at this value. It should probably never be changed, in case
+some application is checking for specific numbers. There is a copy of this
+#define in pcre2posix.c (which now no longer includes this file). Ideally, a
+way of having a single definition should be found, but as the number is
+unlikely to change, this is not a pressing issue. The original reason for
+having a base other than 0 was to keep the absolute values of compile-time and
+run-time error numbers numerically different, but in the event the code does
+not rely on this. */

 #define COMPILE_ERROR_BASE 100

@ -266,21 +270,21 @@ advancing the pointer. */

 #define GETUTF8(c, eptr) \
    { \
-    if ((c & 0x20) == 0) \
-      c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
-    else if ((c & 0x10) == 0) \
-      c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
-    else if ((c & 0x08) == 0) \
-      c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
-      ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
-    else if ((c & 0x04) == 0) \
-      c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
-          ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
-          (eptr[4] & 0x3f); \
+    if ((c & 0x20u) == 0) \
+      c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
+    else if ((c & 0x10u) == 0) \
+      c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
+    else if ((c & 0x08u) == 0) \
+      c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
+      ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
+    else if ((c & 0x04u) == 0) \
+      c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
+          ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
+          (eptr[4] & 0x3fu); \
    else \
-      c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
-          ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
-          ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
+      c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
+          ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
+          ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
    }

 /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
@ -288,31 +292,31 @@ the pointer. */

 #define GETUTF8INC(c, eptr) \
    { \
-    if ((c & 0x20) == 0) \
-      c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \
-    else if ((c & 0x10) == 0) \
+    if ((c & 0x20u) == 0) \
+      c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
+    else if ((c & 0x10u) == 0) \
      { \
-      c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \
+      c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
      eptr += 2; \
      } \
-    else if ((c & 0x08) == 0) \
+    else if ((c & 0x08u) == 0) \
      { \
-      c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \
-          ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
+      c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
+          ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
      eptr += 3; \
      } \
-    else if ((c & 0x04) == 0) \
+    else if ((c & 0x04u) == 0) \
      { \
-      c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \
-          ((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \
-          (eptr[3] & 0x3f); \
+      c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
+          ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
+          (eptr[3] & 0x3fu); \
      eptr += 4; \
      } \
    else \
      { \
-      c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \
-          ((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \
-          ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \
+      c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
+          ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
+          ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
      eptr += 5; \
      } \
    }
@ -322,34 +326,34 @@ advancing the pointer, incrementing the length. */

 #define GETUTF8LEN(c, eptr, len) \
    { \
-    if ((c & 0x20) == 0) \
+    if ((c & 0x20u) == 0) \
      { \
-      c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
+      c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
      len++; \
      } \
-    else if ((c & 0x10)  == 0) \
+    else if ((c & 0x10u)  == 0) \
      { \
-      c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
+      c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
      len += 2; \
      } \
-    else if ((c & 0x08)  == 0) \
+    else if ((c & 0x08u)  == 0) \
      {\
-      c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
-          ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
+      c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
+          ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
      len += 3; \
      } \
-    else if ((c & 0x04)  == 0) \
+    else if ((c & 0x04u)  == 0) \
      { \
-      c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
-          ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
-          (eptr[4] & 0x3f); \
+      c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
+          ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
+          (eptr[4] & 0x3fu); \
      len += 4; \
      } \
    else \
      {\
-      c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
-          ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
-          ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
+      c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
+          ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
+          ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
      len += 5; \
      } \
    }
@ -379,7 +383,7 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
 /* Character U+180E (Mongolian Vowel Separator) is not included in the list of
 spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
 space. However, in many other sources it is listed as a space and has been in
-PCRE for a long time. */
+PCRE (both APIs) for a long time. */

 #define HSPACE_LIST \
  CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
@ -524,9 +528,11 @@ bytes in a code unit in that mode. */
 #define PCRE2_NL_SET        0x00008000  /* newline was set in the pattern */
 #define PCRE2_NOTEMPTY_SET  0x00010000  /* (*NOTEMPTY) used        ) keep */
 #define PCRE2_NE_ATST_SET   0x00020000  /* (*NOTEMPTY_ATSTART) used) together */
-#define PCRE2_DEREF_TABLES  0x00040000  /* Release character tables. */
+#define PCRE2_DEREF_TABLES  0x00040000  /* release character tables */
 #define PCRE2_NOJIT         0x00080000  /* (*NOJIT) used */
 #define PCRE2_HASBKPORX     0x00100000  /* contains \P, \p, or \X */
+#define PCRE2_DUPCAPUSED    0x00200000  /* contains (?| */
+#define PCRE2_HASBKC        0x00400000  /* contains \C */

 #define PCRE2_MODE_MASK     (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)

@ -545,17 +551,9 @@ req_unit match. */

 #define REQ_CU_MAX 1000

-/* Bit definitions for entries in the pcre_ctypes table. */
-
-#define ctype_space   0x01
-#define ctype_letter  0x02
-#define ctype_digit   0x04
-#define ctype_xdigit  0x08
-#define ctype_word    0x10    /* alphanumeric or '_' */
-#define ctype_meta    0x80    /* regexp meta char or zero (end pattern) */
-
-/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
-of bits for a class map. Some classes are built by combining these tables. */
+/* Offsets for the bitmap tables in the cbits set of tables. Each table
+contains a set of bits for a class map. Some classes are built by combining
+these tables. */

 #define cbit_space     0      /* [:space:] or \s */
 #define cbit_xdigit   32      /* [:xdigit:] */
@ -569,19 +567,28 @@ of bits for a class map. Some classes are built by combining these tables. */
 #define cbit_cntrl   288      /* [:cntrl:] */
 #define cbit_length  320      /* Length of the cbits table */

-/* Offsets of the various tables from the base tables pointer, and
-total length. */
+/* Bit definitions for entries in the ctypes table. */

-#define lcc_offset      0
-#define fcc_offset    256
-#define cbits_offset  512
-#define ctypes_offset (cbits_offset + cbit_length)
+#define ctype_space   0x01
+#define ctype_letter  0x02
+#define ctype_digit   0x04
+#define ctype_xdigit  0x08
+#define ctype_word    0x10    /* alphanumeric or '_' */
+#define ctype_meta    0x80    /* regexp meta char or zero (end pattern) */
+
+/* Offsets of the various tables from the base tables pointer, and
+total length of the tables. */
+
+#define lcc_offset      0                           /* Lower case */
+#define fcc_offset    256                           /* Flip case */
+#define cbits_offset  512                           /* Character classes */
+#define ctypes_offset (cbits_offset + cbit_length)  /* Character types */
 #define tables_length (ctypes_offset + 256)


 /* -------------------- Character and string names ------------------------ */

-/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
+/* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal
 character constants like '*' because the compiler would emit their EBCDIC code,
 which is different from their ASCII/UTF-8 code. Instead we define macros for
 the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
@ -589,7 +596,7 @@ is enabled. When UTF-8 support is not enabled, the definitions use character
 literals. Both character and string versions of each character are needed, and
 there are some longer strings as well.

-This means that, on EBCDIC platforms, the PCRE library can handle either
+This means that, on EBCDIC platforms, the PCRE2 library can handle either
 EBCDIC, or UTF-8, but not both. To support both in the same compiled library
 would need different lookups depending on whether PCRE2_UTF was set or not.
 This would make it impossible to use characters in switch/case statements,
@ -601,7 +608,7 @@ macros to give the functions distinct names. */
 #ifndef SUPPORT_UNICODE

 /* UTF-8 support is not enabled; use the platform-dependent character literals
-so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
+so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF
 mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
 characters, a common practice has been to use its NL (0x15) character as the
 line terminator in C-like processing environments. However, sometimes the LF
@ -609,7 +616,7 @@ line terminator in C-like processing environments. However, sometimes the LF

 http://unicode.org/standard/reports/tr13/tr13-5.html

-PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
+PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
 instead. Whichever is *not* chosen is defined as NEL.

 In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
@ -917,6 +924,7 @@ a positive value. */
 #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  "NOTEMPTY_ATSTART)"
 #define STRING_LIMIT_MATCH_EQ             "LIMIT_MATCH="
 #define STRING_LIMIT_RECURSION_EQ         "LIMIT_RECURSION="
+#define STRING_MARK                       "MARK"

 #else  /* SUPPORT_UNICODE */

@ -1189,6 +1197,7 @@ only. */
 #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
 #define STRING_LIMIT_MATCH_EQ             STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
 #define STRING_LIMIT_RECURSION_EQ         STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
+#define STRING_MARK                       STR_M STR_A STR_R STR_K

 #endif  /* SUPPORT_UNICODE */

@ -1212,7 +1221,7 @@ only. */
 #define PT_TABSIZE   11    /* Size of square table for autopossessify tests */

 /* The following special properties are used only in XCLASS items, when POSIX
-classes are specified and PCRE_UCP is set - in other words, for Unicode
+classes are specified and PCRE2_UCP is set - in other words, for Unicode
 handling of these classes. They are not available via the \p or \P escapes like
 those in the above list, and so they do not take part in the autopossessifying
 table. */
@ -1275,23 +1284,16 @@ mode rather than an escape sequence. It is also used for [^] in JavaScript
 compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
 like \N.

-The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
-when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
-They must be contiguous, and remain in order so that the replacements can be
-looked up from a table.
-
 Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
-check_escape(). There are two tests in the code for an escape
-greater than ESC_b and less than ESC_Z to detect the types that may be
-repeated. These are the types that consume characters. If any new escapes are
-put in between that don't consume a character, that code will have to change.
-*/
+check_escape(). There are tests in the code for an escape greater than ESC_b
+and less than ESC_Z to detect the types that may be repeated. These are the
+types that consume characters. If any new escapes are put in between that don't
+consume a character, that code will have to change. */

 enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
       ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
       ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
-       ESC_E, ESC_Q, ESC_g, ESC_k,
-       ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
+       ESC_E, ESC_Q, ESC_g, ESC_k };


 /********************** Opcode definitions ******************/
@ -1301,12 +1303,12 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
 Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
 order to the list of escapes immediately above. Furthermore, values up to
 OP_DOLLM must not be changed without adjusting the table called autoposstab in
-pcre_compile.c
+pcre2_auto_possess.c

 Whenever this list is updated, the two macro definitions that follow must be
 updated to match. The possessification table called "opcode_possessify" in
-pcre_compile.c must also be updated, and also the tables called "coptable"
-and "poptable" in pcre_dfa_exec.c.
+pcre2_compile.c must also be updated, and also the tables called "coptable"
+and "poptable" in pcre2_dfa_match.c.

 ****** NOTE NOTE NOTE ******/

@ -1357,7 +1359,8 @@ enum {
  OP_CIRC,           /* 27 Start of line - not multiline */
  OP_CIRCM,          /* 28 Start of line - multiline */

-  /* Single characters; caseful must precede the caseless ones */
+  /* Single characters; caseful must precede the caseless ones, and these
+  must remain in this order, and adjacent. */

  OP_CHAR,           /* 29 Match one character, casefully */
  OP_CHARI,          /* 30 Match one character, caselessly */
@ -1800,11 +1803,16 @@ typedef struct pcre2_serialized_data {

 #if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0

+/* EBCDIC is supported only for the 8-bit library. */
+
+#if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8
+#error EBCDIC is not supported for the 16-bit or 32-bit libraries
+#endif
+
 /* This is the largest non-UTF code point. */

 #define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH))

-
 /* Internal shared data tables and variables. These are used by more than one
 of the exported public functions. They have to be "external" in the C sense,
 but are not part of the PCRE2 public API. Although the data for some of them is
@ -1878,11 +1886,12 @@ private structures. */

 /* Private "external" functions. These are internal functions that are called
 from modules other than the one in which they are defined. They have to be
-"external" in the C sense, but are not part of the PCRE public API. They are
+"external" in the C sense, but are not part of the PCRE2 public API. They are
 not referenced from pcre2test, and must not be defined when no code unit width
 is available. */

 #define _pcre2_auto_possessify       PCRE2_SUFFIX(_pcre2_auto_possessify_)
+#define _pcre2_check_escape          PCRE2_SUFFIX(_pcre2_check_escape_)
 #define _pcre2_find_bracket          PCRE2_SUFFIX(_pcre2_find_bracket_)
 #define _pcre2_is_newline            PCRE2_SUFFIX(_pcre2_is_newline_)
 #define _pcre2_jit_free_rodata       PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
@ -1904,6 +1913,8 @@ is available. */

 extern int          _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
                      const compile_block *);
+extern int          _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
+                      int *, uint32_t, BOOL, compile_block *);
 extern PCRE2_SPTR   _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
 extern BOOL         _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
                      uint32_t *, BOOL);