Update bundled PCRE2-library to version 10.23

Some manual changes done to the library were lost with this update. They will be added in the next commit.
2017-05-29 15:31:42 +03:00
parent 7231563937
commit 36af74cb25
218 changed files with 49218 additions and 26130 deletions
--- a/pcre2/testdata/testoutput22-8
+++ b/pcre2/testdata/testoutput22-8
@ -0,0 +1,171 @@
+# Tests of \C when Unicode support is available. Note that \C is not supported
+# for DFA matching in UTF mode, so this test is not run with -dfa. The output
+# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
+# in some widths and not in others.
+
+/ab\Cde/utf,info
+Capturing subpattern count = 0
+Contains \C
+Options: utf
+First code unit = 'a'
+Last code unit = 'e'
+Subject length lower bound = 0
+    abXde
+ 0: abXde
+
+# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and
+# 16-bit modes, but not in 32-bit mode.
+
+/(?<=ab\Cde)X/utf
+Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-8 mode
+    ab!deXYZ
+
+# Autopossessification tests
+
+/\C+\X \X+\C/Bx
+------------------------------------------------------------------
+        Bra
+        AllAny+
+        extuni
+        extuni+
+        AllAny
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C+\X \X+\C/Bx,utf
+------------------------------------------------------------------
+        Bra
+        Anybyte+
+        extuni
+        extuni+
+        Anybyte
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C\X*TӅ;
+{0,6}\v+
+F
+/utf
+\= Expect no match
+    Ӆ\x0a
+No match
+
+/\C(\W?ſ)'?{{/utf
+\= Expect no match
+    \\C(\\W?ſ)'?{{
+No match
+
+/X(\C{3})/utf
+    X\x{1234}
+ 0: X\x{1234}
+ 1: \x{1234}
+    X\x{11234}Y
+ 0: X\x{f0}\x{91}\x{88}
+ 1: \x{f0}\x{91}\x{88}
+    X\x{11234}YZ
+ 0: X\x{f0}\x{91}\x{88}
+ 1: \x{f0}\x{91}\x{88}
+
+/X(\C{4})/utf
+    X\x{1234}YZ
+ 0: X\x{1234}Y
+ 1: \x{1234}Y
+    X\x{11234}YZ
+ 0: X\x{11234}
+ 1: \x{11234}
+    X\x{11234}YZW
+ 0: X\x{11234}
+ 1: \x{11234}
+
+/X\C*/utf
+    XYZabcdce
+ 0: XYZabcdce
+
+/X\C*?/utf
+    XYZabcde
+ 0: X
+
+/X\C{3,5}/utf
+    Xabcdefg
+ 0: Xabcde
+    X\x{1234}
+ 0: X\x{1234}
+    X\x{1234}YZ
+ 0: X\x{1234}YZ
+    X\x{1234}\x{512}
+ 0: X\x{1234}\x{512}
+    X\x{1234}\x{512}YZ
+ 0: X\x{1234}\x{512}
+    X\x{11234}Y
+ 0: X\x{11234}Y
+    X\x{11234}YZ
+ 0: X\x{11234}Y
+    X\x{11234}\x{512}
+ 0: X\x{11234}\x{d4}
+    X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{d4}
+    X\x{11234}\x{512}\x{11234}Z
+ 0: X\x{11234}\x{d4}
+
+/X\C{3,5}?/utf
+    Xabcdefg
+ 0: Xabc
+    X\x{1234}
+ 0: X\x{1234}
+    X\x{1234}YZ
+ 0: X\x{1234}
+    X\x{1234}\x{512}
+ 0: X\x{1234}
+    X\x{11234}Y
+ 0: X\x{f0}\x{91}\x{88}
+    X\x{11234}YZ
+ 0: X\x{f0}\x{91}\x{88}
+    X\x{11234}\x{512}YZ
+ 0: X\x{f0}\x{91}\x{88}
+    X\x{11234}
+ 0: X\x{f0}\x{91}\x{88}
+
+/a\Cb/utf
+    aXb
+ 0: aXb
+    a\nb
+ 0: a\x{0a}b
+    a\x{100}b
+No match
+
+/a\C\Cb/utf
+    a\x{100}b
+ 0: a\x{100}b
+    a\x{12257}b
+No match
+    a\x{12257}\x{11234}b
+No match
+
+/ab\Cde/utf
+    abXde
+ 0: abXde
+
+# This one is here not because it's different to Perl, but because the way
+# the captured single code unit is displayed. (In Perl it becomes a character,
+# and you can't tell the difference.)
+
+/X(\C)(.*)/utf
+    X\x{1234}
+ 0: X\x{1234}
+ 1: \x{e1}
+ 2: \x{88}\x{b4}
+    X\nabc
+ 0: X\x{0a}abc
+ 1: \x{0a}
+ 2: abc
+
+# This one is here because Perl gives out a grumbly error message (quite
+# correctly, but that messes up comparisons).
+
+/a\Cb/utf
+\= Expect no match in 8-bit mode
+    a\x{100}b
+No match
+