Update bundled PCRE2-library to version 10.23

Some manual changes done to the library were lost with this update.
They will be added in the next commit.
This commit is contained in:
Esa Korhonen
2017-05-29 15:31:42 +03:00
parent 7231563937
commit 36af74cb25
218 changed files with 49218 additions and 26130 deletions

View File

@ -1,70 +1,10 @@
# This set of tests is for UTF-8 support and Unicode property support, with
# relevance only for the 8-bit library.
/X(\C{3})/utf
X\x{1234}
0: X\x{1234}
1: \x{1234}
/X(\C{4})/utf
X\x{1234}YZ
0: X\x{1234}Y
1: \x{1234}Y
/X\C*/utf
XYZabcdce
0: XYZabcdce
/X\C*?/utf
XYZabcde
0: X
/X\C{3,5}/utf
Xabcdefg
0: Xabcde
X\x{1234}
0: X\x{1234}
X\x{1234}YZ
0: X\x{1234}YZ
X\x{1234}\x{512}
0: X\x{1234}\x{512}
X\x{1234}\x{512}YZ
0: X\x{1234}\x{512}
/X\C{3,5}?/utf
Xabcdefg
0: Xabc
X\x{1234}
0: X\x{1234}
X\x{1234}YZ
0: X\x{1234}
X\x{1234}\x{512}
0: X\x{1234}
/a\Cb/utf
aXb
0: aXb
a\nb
0: a\x{0a}b
/a\C\Cb/utf
a\x{100}b
0: a\x{100}b
/ab\Cde/utf
abXde
0: abXde
/a\C\Cb/utf
a\x{100}b
0: a\x{100}b
** Failers
No match
a\x{12257}b
No match
# The next 4 patterns have UTF-8 errors
/[�]/utf
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
/�/utf
Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
@ -72,7 +12,13 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
/���xxx/utf
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
/��������/utf
Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
# Now test subjects
/badutf/utf
\= Expect UTF-8 errors
X\xdf
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
XX\xef
@ -146,13 +92,14 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
\xfc\x80\x80\x80\x80\x8f
Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
\x80
Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
\xfe
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
\xff
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
/badutf/utf
\= Expect UTF-8 errors
XX\xfb\x80\x80\x80\x80
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
XX\xfd\x80\x80\x80\x80\x80
@ -161,6 +108,7 @@ Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at of
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
/shortutf/utf
\= Expect UTF-8 errors
XX\xdf\=ph
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
XX\xef\=ph
@ -193,6 +141,7 @@ Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
/anything/utf
\= Expect UTF-8 errors
X\xc0\x80
Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
XX\xc1\x8f
@ -209,6 +158,15 @@ Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
\xff\x80\x80\x80\x80\x80
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
\xf8\x88\x80\x80\x80
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
\xf9\x87\x80\x80\x80
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
\xfc\x84\x80\x80\x80\x80
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
\xfd\x83\x80\x80\x80\x80
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
\= Expect no match
\xc3\x8f
No match
\xe0\xaf\x80
@ -219,14 +177,6 @@ No match
No match
\xf1\x8f\x80\x80
No match
\xf8\x88\x80\x80\x80
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
\xf9\x87\x80\x80\x80
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
\xfc\x84\x80\x80\x80\x80
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
\xfd\x83\x80\x80\x80\x80
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
\xf8\x88\x80\x80\x80\=no_utf_check
No match
\xf9\x87\x80\x80\x80\=no_utf_check
@ -235,7 +185,62 @@ No match
No match
\xfd\x83\x80\x80\x80\x80\=no_utf_check
No match
# Similar tests with offsets
/badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=1
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
\= Expect no match
X\xdfabcd\=offset=2
No match
/(?<=x)badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=1
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=2
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\xdf\=offset=3
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
\= Expect no match
X\xdfabcd\=offset=3
No match
/(?<=xx)badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=1
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=2
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=3
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
/(?<=xxxx)badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=1
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=2
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabcd\=offset=3
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
X\xdfabc\xdf\=offset=6
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
X\xdfabc\xdf\=offset=7
Failed: error -33: bad offset value
\= Expect no match
X\xdfabcd\=offset=6
No match
/\x{100}/IB,utf
------------------------------------------------------------------
Bra
@ -448,29 +453,6 @@ First code unit = \xf0
Last code unit = \xab
Subject length lower bound = 1
# This one is here not because it's different to Perl, but because the way
# the captured single-byte is displayed. (In Perl it becomes a character, and you
# can't tell the difference.)
/X(\C)(.*)/utf
X\x{1234}
0: X\x{1234}
1: \x{e1}
2: \x{88}\x{b4}
X\nabc
0: X\x{0a}abc
1: \x{0a}
2: abc
# This one is here because Perl gives out a grumbly error message (quite
# correctly, but that messes up comparisons).
/a\Cb/utf
*** Failers
No match
a\x{100}b
No match
/[^ab\xC0-\xF0]/IB,utf
------------------------------------------------------------------
Bra
@ -499,8 +481,7 @@ Subject length lower bound = 1
0: \x{100}
\x{1000}
0: \x{1000}
*** Failers
0: *
\= Expect no match
\x{c0}
No match
\x{f0}
@ -659,8 +640,6 @@ Subject length lower bound = 1
0: \x{100}
\x{100}Z
0: \x{100}
*** Failers
No match
/[\xff]/IB,utf
------------------------------------------------------------------
@ -750,33 +729,35 @@ Failed: error 106 at offset 15: missing terminating ] for character class
# This tests the stricter UTF-8 check according to RFC 3629.
/X/utf
\= Expect UTF-8 errors
\x{d800}
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
\x{d800}\=no_utf_check
No match
\x{da00}
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
\x{da00}\=no_utf_check
No match
\x{dfff}
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
\x{dfff}\=no_utf_check
No match
\x{110000}
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
\x{110000}\=no_utf_check
No match
\x{2000000}
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
\x{2000000}\=no_utf_check
No match
\x{7fffffff}
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
\= Expect no match
\x{d800}\=no_utf_check
No match
\x{da00}\=no_utf_check
No match
\x{dfff}\=no_utf_check
No match
\x{110000}\=no_utf_check
No match
\x{2000000}\=no_utf_check
No match
\x{7fffffff}\=no_utf_check
No match
/(*UTF8)\x{1234}/
abcd\x{1234}pqr
abcd\x{1234}pqr
0: \x{1234}
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
@ -887,16 +868,19 @@ Subject length lower bound = 3
/a+/utf
a\x{123}aa\=offset=1
0: aa
a\x{123}aa\=offset=2
Error -36 (bad UTF-8 offset)
a\x{123}aa\=offset=3
0: aa
a\x{123}aa\=offset=4
0: a
a\x{123}aa\=offset=5
No match
\= Expect bad offset value
a\x{123}aa\=offset=6
Failed: error -33: bad offset value
\= Expect bad UTF-8 offset
a\x{123}aa\=offset=2
Error -36 (bad UTF-8 offset)
\= Expect no match
a\x{123}aa\=offset=5
No match
/\x{1234}+/Ii,utf
Capturing subpattern count = 0
@ -1281,8 +1265,6 @@ Subject length lower bound = 1
0: \x{100}
\x{100}Z
0: \x{100}
*** Failers
No match
/[z-\x{100}]/IB,utf
------------------------------------------------------------------
@ -1467,8 +1449,7 @@ Subject length lower bound = 1
0: \x{105}
\x{109}
0: \x{109}
** Failers
No match
\= Expect no match
\x{100}
No match
\x{10a}
@ -1507,8 +1488,7 @@ Subject length lower bound = 1
0: \x{100}
\x{101}
0: \x{101}
** Failers
No match
\= Expect no match
\x{102}
No match
Y
@ -1547,7 +1527,52 @@ Last code unit = 'B' (caseless)
Subject length lower bound = 2
/abc/utf,replace=�
abc
abc
Failed: error -3: UTF-8 error: 1 byte missing at end
/(?<=(a)(?-1))x/I,utf
Capturing subpattern count = 1
Max lookbehind = 2
Options: utf
First code unit = 'x'
Subject length lower bound = 1
a\x80zx\=offset=3
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
/[\W\p{Any}]/B
------------------------------------------------------------------
Bra
[\x00-/:-@[-^`{-\xff\p{Any}]
Ket
End
------------------------------------------------------------------
abc
0: a
123
0: 1
/[\W\pL]/B
------------------------------------------------------------------
Bra
[\x00-/:-@[-^`{-\xff\p{L}]
Ket
End
------------------------------------------------------------------
abc
0: a
\= Expect no match
123
No match
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
/[\s[:^ascii:]]/B,ucp
------------------------------------------------------------------
Bra
[\x80-\xff\p{Xsp}]
Ket
End
------------------------------------------------------------------
# End of testinput10