Update bundled PCRE2-library to version 10.23

Some manual changes done to the library were lost with this update. They will be added in the next commit.
2017-05-29 15:31:42 +03:00
parent 7231563937
commit 36af74cb25
218 changed files with 49218 additions and 26130 deletions
--- a/pcre2/testdata/testoutput10
+++ b/pcre2/testdata/testoutput10
@ -1,70 +1,10 @@
 # This set of tests is for UTF-8 support and Unicode property support, with
 # relevance only for the 8-bit library.

-/X(\C{3})/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{1234}
-
-/X(\C{4})/utf
-    X\x{1234}YZ
- 0: X\x{1234}Y
- 1: \x{1234}Y
-
-/X\C*/utf
-    XYZabcdce
- 0: XYZabcdce
-
-/X\C*?/utf
-    XYZabcde
- 0: X
-
-/X\C{3,5}/utf
-    Xabcdefg
- 0: Xabcde
-    X\x{1234}
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}YZ
-    X\x{1234}\x{512}
- 0: X\x{1234}\x{512}
-    X\x{1234}\x{512}YZ
- 0: X\x{1234}\x{512}
-
-/X\C{3,5}?/utf
-    Xabcdefg
- 0: Xabc
-    X\x{1234}
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}
-    X\x{1234}\x{512}
- 0: X\x{1234}
-
-/a\Cb/utf
-    aXb
- 0: aXb
-    a\nb
- 0: a\x{0a}b
-
-/a\C\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-
-/ab\Cde/utf
-    abXde
- 0: abXde
-
-/a\C\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-    ** Failers
-No match
-    a\x{12257}b
-No match
+# The next 4 patterns have UTF-8 errors

 /[�]/utf
-Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
+Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80

 /�/utf
 Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
@ -72,7 +12,13 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
 /���xxx/utf
 Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80

+/Â��������/utf
+Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
+
+# Now test subjects
+
 /badutf/utf
+\= Expect UTF-8 errors
    X\xdf
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
    XX\xef
@ -146,13 +92,14 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
    \xfc\x80\x80\x80\x80\x8f
 Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
    \x80
-Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
    \xfe
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
    \xff
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0

 /badutf/utf
+\= Expect UTF-8 errors
    XX\xfb\x80\x80\x80\x80
 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
    XX\xfd\x80\x80\x80\x80\x80
@ -161,6 +108,7 @@ Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at of
 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2

 /shortutf/utf
+\= Expect UTF-8 errors
    XX\xdf\=ph
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
    XX\xef\=ph
@ -193,6 +141,7 @@ Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0

 /anything/utf
+\= Expect UTF-8 errors
    X\xc0\x80
 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
    XX\xc1\x8f
@ -209,6 +158,15 @@ Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
    \xff\x80\x80\x80\x80\x80
 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
+    \xf8\x88\x80\x80\x80
+Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
+    \xf9\x87\x80\x80\x80
+Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
+    \xfc\x84\x80\x80\x80\x80
+Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+    \xfd\x83\x80\x80\x80\x80
+Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+\= Expect no match
    \xc3\x8f
 No match
    \xe0\xaf\x80
@ -219,14 +177,6 @@ No match
 No match
    \xf1\x8f\x80\x80
 No match
-    \xf8\x88\x80\x80\x80
-Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
-    \xf9\x87\x80\x80\x80
-Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
-    \xfc\x84\x80\x80\x80\x80
-Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
-    \xfd\x83\x80\x80\x80\x80
-Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
    \xf8\x88\x80\x80\x80\=no_utf_check
 No match
    \xf9\x87\x80\x80\x80\=no_utf_check
@ -235,7 +185,62 @@ No match
 No match
    \xfd\x83\x80\x80\x80\x80\=no_utf_check
 No match
+    
+# Similar tests with offsets

+/badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+\= Expect no match
+    X\xdfabcd\=offset=2
+No match
+
+/(?<=x)badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\xdf\=offset=3
+Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
+\= Expect no match
+    X\xdfabcd\=offset=3
+No match
+
+/(?<=xx)badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=3
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+
+/(?<=xxxx)badutf/utf
+\= Expect UTF-8 errors
+    X\xdfabcd
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=1
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=2
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabcd\=offset=3
+Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
+    X\xdfabc\xdf\=offset=6
+Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
+    X\xdfabc\xdf\=offset=7
+Failed: error -33: bad offset value
+\= Expect no match
+    X\xdfabcd\=offset=6
+No match
+ 
 /\x{100}/IB,utf
 ------------------------------------------------------------------
        Bra
@ -448,29 +453,6 @@ First code unit = \xf0
 Last code unit = \xab
 Subject length lower bound = 1

-# This one is here not because it's different to Perl, but because the way
-# the captured single-byte is displayed. (In Perl it becomes a character, and you
-# can't tell the difference.)
-
-/X(\C)(.*)/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{e1}
- 2: \x{88}\x{b4}
-    X\nabc
- 0: X\x{0a}abc
- 1: \x{0a}
- 2: abc
-
-# This one is here because Perl gives out a grumbly error message (quite
-# correctly, but that messes up comparisons).
-
-/a\Cb/utf
-    *** Failers
-No match
-    a\x{100}b
-No match
-
 /[^ab\xC0-\xF0]/IB,utf
 ------------------------------------------------------------------
        Bra
@ -499,8 +481,7 @@ Subject length lower bound = 1
 0: \x{100}
    \x{1000}
 0: \x{1000}
-    *** Failers
- 0: *
+\= Expect no match
    \x{c0}
 No match
    \x{f0}
@ -659,8 +640,6 @@ Subject length lower bound = 1
 0: \x{100}
    \x{100}Z
 0: \x{100}
-    *** Failers
-No match

 /[\xff]/IB,utf
 ------------------------------------------------------------------
@ -750,33 +729,35 @@ Failed: error 106 at offset 15: missing terminating ] for character class
 # This tests the stricter UTF-8 check according to RFC 3629.

 /X/utf
+\= Expect UTF-8 errors
    \x{d800}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
-    \x{d800}\=no_utf_check
-No match
    \x{da00}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
-    \x{da00}\=no_utf_check
-No match
    \x{dfff}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
-    \x{dfff}\=no_utf_check
-No match
    \x{110000}
 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
-    \x{110000}\=no_utf_check
-No match
    \x{2000000}
 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
-    \x{2000000}\=no_utf_check
-No match
    \x{7fffffff}
 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
+\= Expect no match
+    \x{d800}\=no_utf_check
+No match
+    \x{da00}\=no_utf_check
+No match
+    \x{dfff}\=no_utf_check
+No match
+    \x{110000}\=no_utf_check
+No match
+    \x{2000000}\=no_utf_check
+No match
    \x{7fffffff}\=no_utf_check
 No match

 /(*UTF8)\x{1234}/
-  abcd\x{1234}pqr
+    abcd\x{1234}pqr
 0: \x{1234}

 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
@ -887,16 +868,19 @@ Subject length lower bound = 3
 /a+/utf
    a\x{123}aa\=offset=1
 0: aa
-    a\x{123}aa\=offset=2
-Error -36 (bad UTF-8 offset)
    a\x{123}aa\=offset=3
 0: aa
    a\x{123}aa\=offset=4
 0: a
-    a\x{123}aa\=offset=5
-No match
+\= Expect bad offset value
    a\x{123}aa\=offset=6
 Failed: error -33: bad offset value
+\= Expect bad UTF-8 offset     
+    a\x{123}aa\=offset=2
+Error -36 (bad UTF-8 offset)
+\= Expect no match
+    a\x{123}aa\=offset=5
+No match

 /\x{1234}+/Ii,utf
 Capturing subpattern count = 0
@ -1281,8 +1265,6 @@ Subject length lower bound = 1
 0: \x{100}
    \x{100}Z
 0: \x{100}
-    *** Failers 
-No match

 /[z-\x{100}]/IB,utf
 ------------------------------------------------------------------
@ -1467,8 +1449,7 @@ Subject length lower bound = 1
 0: \x{105}
    \x{109}  
 0: \x{109}
-    ** Failers
-No match
+\= Expect no match
    \x{100}
 No match
    \x{10a} 
@ -1507,8 +1488,7 @@ Subject length lower bound = 1
 0: \x{100}
    \x{101} 
 0: \x{101}
-    ** Failers
-No match
+\= Expect no match
    \x{102}
 No match
    Y
@ -1547,7 +1527,52 @@ Last code unit = 'B' (caseless)
 Subject length lower bound = 2

 /abc/utf,replace=�
-   abc
+    abc
 Failed: error -3: UTF-8 error: 1 byte missing at end

+/(?<=(a)(?-1))x/I,utf
+Capturing subpattern count = 1
+Max lookbehind = 2
+Options: utf
+First code unit = 'x'
+Subject length lower bound = 1
+    a\x80zx\=offset=3
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
+
+/[\W\p{Any}]/B
+------------------------------------------------------------------
+        Bra
+        [\x00-/:-@[-^`{-\xff\p{Any}]
+        Ket
+        End
+------------------------------------------------------------------
+    abc
+ 0: a
+    123 
+ 0: 1
+
+/[\W\pL]/B
+------------------------------------------------------------------
+        Bra
+        [\x00-/:-@[-^`{-\xff\p{L}]
+        Ket
+        End
+------------------------------------------------------------------
+    abc
+ 0: a
+\= Expect no match
+    123     
+No match
+
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
+Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+
+/[\s[:^ascii:]]/B,ucp
+------------------------------------------------------------------
+        Bra
+        [\x80-\xff\p{Xsp}]
+        Ket
+        End
+------------------------------------------------------------------
+
 # End of testinput10