Update bundled PCRE2-library to version 10.23
Some manual changes done to the library were lost with this update. They will be added in the next commit.
This commit is contained in:
265
pcre2/testdata/testoutput10
vendored
265
pcre2/testdata/testoutput10
vendored
@ -1,70 +1,10 @@
|
||||
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||
# relevance only for the 8-bit library.
|
||||
|
||||
/X(\C{3})/utf
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
1: \x{1234}
|
||||
|
||||
/X(\C{4})/utf
|
||||
X\x{1234}YZ
|
||||
0: X\x{1234}Y
|
||||
1: \x{1234}Y
|
||||
|
||||
/X\C*/utf
|
||||
XYZabcdce
|
||||
0: XYZabcdce
|
||||
|
||||
/X\C*?/utf
|
||||
XYZabcde
|
||||
0: X
|
||||
|
||||
/X\C{3,5}/utf
|
||||
Xabcdefg
|
||||
0: Xabcde
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
X\x{1234}YZ
|
||||
0: X\x{1234}YZ
|
||||
X\x{1234}\x{512}
|
||||
0: X\x{1234}\x{512}
|
||||
X\x{1234}\x{512}YZ
|
||||
0: X\x{1234}\x{512}
|
||||
|
||||
/X\C{3,5}?/utf
|
||||
Xabcdefg
|
||||
0: Xabc
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
X\x{1234}YZ
|
||||
0: X\x{1234}
|
||||
X\x{1234}\x{512}
|
||||
0: X\x{1234}
|
||||
|
||||
/a\Cb/utf
|
||||
aXb
|
||||
0: aXb
|
||||
a\nb
|
||||
0: a\x{0a}b
|
||||
|
||||
/a\C\Cb/utf
|
||||
a\x{100}b
|
||||
0: a\x{100}b
|
||||
|
||||
/ab\Cde/utf
|
||||
abXde
|
||||
0: abXde
|
||||
|
||||
/a\C\Cb/utf
|
||||
a\x{100}b
|
||||
0: a\x{100}b
|
||||
** Failers
|
||||
No match
|
||||
a\x{12257}b
|
||||
No match
|
||||
# The next 4 patterns have UTF-8 errors
|
||||
|
||||
/[�]/utf
|
||||
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
|
||||
Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
|
||||
|
||||
/�/utf
|
||||
Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
|
||||
@ -72,7 +12,13 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
|
||||
/���xxx/utf
|
||||
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
|
||||
|
||||
/��������/utf
|
||||
Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
|
||||
|
||||
# Now test subjects
|
||||
|
||||
/badutf/utf
|
||||
\= Expect UTF-8 errors
|
||||
X\xdf
|
||||
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
|
||||
XX\xef
|
||||
@ -146,13 +92,14 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
|
||||
\xfc\x80\x80\x80\x80\x8f
|
||||
Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
|
||||
\x80
|
||||
Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0
|
||||
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
|
||||
\xfe
|
||||
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
|
||||
\xff
|
||||
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
|
||||
|
||||
/badutf/utf
|
||||
\= Expect UTF-8 errors
|
||||
XX\xfb\x80\x80\x80\x80
|
||||
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
|
||||
XX\xfd\x80\x80\x80\x80\x80
|
||||
@ -161,6 +108,7 @@ Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at of
|
||||
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
|
||||
|
||||
/shortutf/utf
|
||||
\= Expect UTF-8 errors
|
||||
XX\xdf\=ph
|
||||
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
|
||||
XX\xef\=ph
|
||||
@ -193,6 +141,7 @@ Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
|
||||
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
|
||||
|
||||
/anything/utf
|
||||
\= Expect UTF-8 errors
|
||||
X\xc0\x80
|
||||
Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
|
||||
XX\xc1\x8f
|
||||
@ -209,6 +158,15 @@ Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
|
||||
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
|
||||
\xff\x80\x80\x80\x80\x80
|
||||
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
|
||||
\xf8\x88\x80\x80\x80
|
||||
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
|
||||
\xf9\x87\x80\x80\x80
|
||||
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
|
||||
\xfc\x84\x80\x80\x80\x80
|
||||
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
|
||||
\xfd\x83\x80\x80\x80\x80
|
||||
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
|
||||
\= Expect no match
|
||||
\xc3\x8f
|
||||
No match
|
||||
\xe0\xaf\x80
|
||||
@ -219,14 +177,6 @@ No match
|
||||
No match
|
||||
\xf1\x8f\x80\x80
|
||||
No match
|
||||
\xf8\x88\x80\x80\x80
|
||||
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
|
||||
\xf9\x87\x80\x80\x80
|
||||
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
|
||||
\xfc\x84\x80\x80\x80\x80
|
||||
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
|
||||
\xfd\x83\x80\x80\x80\x80
|
||||
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
|
||||
\xf8\x88\x80\x80\x80\=no_utf_check
|
||||
No match
|
||||
\xf9\x87\x80\x80\x80\=no_utf_check
|
||||
@ -235,7 +185,62 @@ No match
|
||||
No match
|
||||
\xfd\x83\x80\x80\x80\x80\=no_utf_check
|
||||
No match
|
||||
|
||||
# Similar tests with offsets
|
||||
|
||||
/badutf/utf
|
||||
\= Expect UTF-8 errors
|
||||
X\xdfabcd
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=1
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
\= Expect no match
|
||||
X\xdfabcd\=offset=2
|
||||
No match
|
||||
|
||||
/(?<=x)badutf/utf
|
||||
\= Expect UTF-8 errors
|
||||
X\xdfabcd
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=1
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=2
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\xdf\=offset=3
|
||||
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
|
||||
\= Expect no match
|
||||
X\xdfabcd\=offset=3
|
||||
No match
|
||||
|
||||
/(?<=xx)badutf/utf
|
||||
\= Expect UTF-8 errors
|
||||
X\xdfabcd
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=1
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=2
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=3
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
|
||||
/(?<=xxxx)badutf/utf
|
||||
\= Expect UTF-8 errors
|
||||
X\xdfabcd
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=1
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=2
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabcd\=offset=3
|
||||
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||
X\xdfabc\xdf\=offset=6
|
||||
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
|
||||
X\xdfabc\xdf\=offset=7
|
||||
Failed: error -33: bad offset value
|
||||
\= Expect no match
|
||||
X\xdfabcd\=offset=6
|
||||
No match
|
||||
|
||||
/\x{100}/IB,utf
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
@ -448,29 +453,6 @@ First code unit = \xf0
|
||||
Last code unit = \xab
|
||||
Subject length lower bound = 1
|
||||
|
||||
# This one is here not because it's different to Perl, but because the way
|
||||
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||||
# can't tell the difference.)
|
||||
|
||||
/X(\C)(.*)/utf
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
1: \x{e1}
|
||||
2: \x{88}\x{b4}
|
||||
X\nabc
|
||||
0: X\x{0a}abc
|
||||
1: \x{0a}
|
||||
2: abc
|
||||
|
||||
# This one is here because Perl gives out a grumbly error message (quite
|
||||
# correctly, but that messes up comparisons).
|
||||
|
||||
/a\Cb/utf
|
||||
*** Failers
|
||||
No match
|
||||
a\x{100}b
|
||||
No match
|
||||
|
||||
/[^ab\xC0-\xF0]/IB,utf
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
@ -499,8 +481,7 @@ Subject length lower bound = 1
|
||||
0: \x{100}
|
||||
\x{1000}
|
||||
0: \x{1000}
|
||||
*** Failers
|
||||
0: *
|
||||
\= Expect no match
|
||||
\x{c0}
|
||||
No match
|
||||
\x{f0}
|
||||
@ -659,8 +640,6 @@ Subject length lower bound = 1
|
||||
0: \x{100}
|
||||
\x{100}Z
|
||||
0: \x{100}
|
||||
*** Failers
|
||||
No match
|
||||
|
||||
/[\xff]/IB,utf
|
||||
------------------------------------------------------------------
|
||||
@ -750,33 +729,35 @@ Failed: error 106 at offset 15: missing terminating ] for character class
|
||||
# This tests the stricter UTF-8 check according to RFC 3629.
|
||||
|
||||
/X/utf
|
||||
\= Expect UTF-8 errors
|
||||
\x{d800}
|
||||
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
|
||||
\x{d800}\=no_utf_check
|
||||
No match
|
||||
\x{da00}
|
||||
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
|
||||
\x{da00}\=no_utf_check
|
||||
No match
|
||||
\x{dfff}
|
||||
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
|
||||
\x{dfff}\=no_utf_check
|
||||
No match
|
||||
\x{110000}
|
||||
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
|
||||
\x{110000}\=no_utf_check
|
||||
No match
|
||||
\x{2000000}
|
||||
Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
|
||||
\x{2000000}\=no_utf_check
|
||||
No match
|
||||
\x{7fffffff}
|
||||
Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
|
||||
\= Expect no match
|
||||
\x{d800}\=no_utf_check
|
||||
No match
|
||||
\x{da00}\=no_utf_check
|
||||
No match
|
||||
\x{dfff}\=no_utf_check
|
||||
No match
|
||||
\x{110000}\=no_utf_check
|
||||
No match
|
||||
\x{2000000}\=no_utf_check
|
||||
No match
|
||||
\x{7fffffff}\=no_utf_check
|
||||
No match
|
||||
|
||||
/(*UTF8)\x{1234}/
|
||||
abcd\x{1234}pqr
|
||||
abcd\x{1234}pqr
|
||||
0: \x{1234}
|
||||
|
||||
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
|
||||
@ -887,16 +868,19 @@ Subject length lower bound = 3
|
||||
/a+/utf
|
||||
a\x{123}aa\=offset=1
|
||||
0: aa
|
||||
a\x{123}aa\=offset=2
|
||||
Error -36 (bad UTF-8 offset)
|
||||
a\x{123}aa\=offset=3
|
||||
0: aa
|
||||
a\x{123}aa\=offset=4
|
||||
0: a
|
||||
a\x{123}aa\=offset=5
|
||||
No match
|
||||
\= Expect bad offset value
|
||||
a\x{123}aa\=offset=6
|
||||
Failed: error -33: bad offset value
|
||||
\= Expect bad UTF-8 offset
|
||||
a\x{123}aa\=offset=2
|
||||
Error -36 (bad UTF-8 offset)
|
||||
\= Expect no match
|
||||
a\x{123}aa\=offset=5
|
||||
No match
|
||||
|
||||
/\x{1234}+/Ii,utf
|
||||
Capturing subpattern count = 0
|
||||
@ -1281,8 +1265,6 @@ Subject length lower bound = 1
|
||||
0: \x{100}
|
||||
\x{100}Z
|
||||
0: \x{100}
|
||||
*** Failers
|
||||
No match
|
||||
|
||||
/[z-\x{100}]/IB,utf
|
||||
------------------------------------------------------------------
|
||||
@ -1467,8 +1449,7 @@ Subject length lower bound = 1
|
||||
0: \x{105}
|
||||
\x{109}
|
||||
0: \x{109}
|
||||
** Failers
|
||||
No match
|
||||
\= Expect no match
|
||||
\x{100}
|
||||
No match
|
||||
\x{10a}
|
||||
@ -1507,8 +1488,7 @@ Subject length lower bound = 1
|
||||
0: \x{100}
|
||||
\x{101}
|
||||
0: \x{101}
|
||||
** Failers
|
||||
No match
|
||||
\= Expect no match
|
||||
\x{102}
|
||||
No match
|
||||
Y
|
||||
@ -1547,7 +1527,52 @@ Last code unit = 'B' (caseless)
|
||||
Subject length lower bound = 2
|
||||
|
||||
/abc/utf,replace=�
|
||||
abc
|
||||
abc
|
||||
Failed: error -3: UTF-8 error: 1 byte missing at end
|
||||
|
||||
/(?<=(a)(?-1))x/I,utf
|
||||
Capturing subpattern count = 1
|
||||
Max lookbehind = 2
|
||||
Options: utf
|
||||
First code unit = 'x'
|
||||
Subject length lower bound = 1
|
||||
a\x80zx\=offset=3
|
||||
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
|
||||
|
||||
/[\W\p{Any}]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-/:-@[-^`{-\xff\p{Any}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abc
|
||||
0: a
|
||||
123
|
||||
0: 1
|
||||
|
||||
/[\W\pL]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-/:-@[-^`{-\xff\p{L}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abc
|
||||
0: a
|
||||
\= Expect no match
|
||||
123
|
||||
No match
|
||||
|
||||
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
|
||||
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||
|
||||
/[\s[:^ascii:]]/B,ucp
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x80-\xff\p{Xsp}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput10
|
||||
|
Reference in New Issue
Block a user