Update bundled PCRE2-library to version 10.23

Some manual changes done to the library were lost with this update.
They will be added in the next commit.
This commit is contained in:
Esa Korhonen
2017-05-29 15:31:42 +03:00
parent 7231563937
commit 36af74cb25
218 changed files with 49218 additions and 26130 deletions

View File

@ -3,6 +3,8 @@
# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
# 12).
#newline_default lf any anycrlf
# PCRE2 and Perl disagree about the characteristics of certain Unicode
# characters. For example, 061C is considered by Perl to be Arabic, though
# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are
@ -11,11 +13,11 @@
# test 4.
/^[\p{Arabic}]/utf
** Failers
\= Expect no match
\x{061c}
/^[[:graph:]]+$/utf,ucp
** Failers
\= Expect no match
\x{61c}
\x{2066}
\x{2067}
@ -23,7 +25,7 @@
\x{2069}
/^[[:print:]]+$/utf,ucp
** Failers
\= Expect no match
\x{61c}
\x{2066}
\x{2067}
@ -54,6 +56,7 @@
A\x{85}\x{2005}Z
/^[[:graph:]]+$/utf,ucp
\= Expect no match
\x{180e}
/^[[:print:]]+$/utf,ucp
@ -63,6 +66,7 @@
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
/^[[:^print:]]+$/utf,ucp
\= Expect no match
\x{180e}
# End of U+180E tests.
@ -109,12 +113,9 @@
/.{3,5}?/IB,utf
\x{212ab}\x{212ab}\x{212ab}\x{861}
/(?<=\C)X/utf
Should produce an error diagnostic
/^[ab]/IB,utf
bar
*** Failers
\= Expect no match
c
\x{ff}
\x{100}
@ -123,7 +124,7 @@
c
\x{ff}
\x{100}
*** Failers
\= Expect no match
aaa
/\x{100}*(\d+|"(?1)")/utf
@ -133,7 +134,7 @@
"\x{100}1234"
\x{100}\x{100}12ab
\x{100}\x{100}"12"
*** Failers
\= Expect no match
\x{100}\x{100}abcd
/\x{100}*/IB,utf
@ -147,7 +148,7 @@
/[Ā-Ą]/utf
\x{100}
\x{104}
*** Failers
\= Expect no match
\x{105}
\x{ff}
@ -217,7 +218,7 @@
a\x{85}b
a\x{2028}b
a\x{2029}b
** Failers
\= Expect no match
a\n\rb
/^a\R*b/bsr=unicode,utf
@ -240,7 +241,7 @@
a\x{85}b
a\n\rb
a\n\r\x{85}\x0cb
** Failers
\= Expect no match
ab
/^a\R{1,3}b/bsr=unicode,utf
@ -251,34 +252,34 @@
a\r\n\r\n\r\nb
a\n\r\n\rb
a\n\n\r\nb
** Failers
\= Expect no match
a\n\n\n\rb
a\r
/\H\h\V\v/utf
X X\x0a
X\x09X\x0b
** Failers
\= Expect no match
\x{a0} X\x0a
/\H*\h+\V?\v{3,4}/utf
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c
** Failers
\= Expect no match
\x09\x20\x{a0}\x0a\x0b
/\H\h\V\v/utf
\x{3001}\x{3000}\x{2030}\x{2028}
X\x{180e}X\x{85}
** Failers
\= Expect no match
\x{2009} X\x0a
/\H*\h+\V?\v{3,4}/utf
\x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
\x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
\x09\x20\x{202f}\x0a\x0b\x0c
** Failers
\= Expect no match
\x09\x{200a}\x{a0}\x{2028}\x0b
/[\h]/B,utf
@ -300,7 +301,7 @@
a\rb
a\nb
a\r\nb
** Failers
\= Expect no match
a\x{85}b
a\x0bb
@ -315,7 +316,7 @@
a\rb
a\nb
a\r\nb
** Failers
\= Expect no match
a\x{85}b
a\x0bb
@ -325,11 +326,10 @@
a\r\nb
a\x{85}b
a\x0bb
** Failers
/.*a.*=.b.*/utf,newline=any
QQQ\x{2029}ABCaXYZ=!bPQR
** Failers
\= Expect no match
a\x{2029}b
\x61\xe2\x80\xa9\x62
@ -338,13 +338,13 @@
/a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref
a\x{1234}b
a\nb
** Failers
\= Expect no match
ab
/a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref
aXb
a\nX\nX\x{1234}b
** Failers
\= Expect no match
ab
/(\x{de})\1/
@ -396,6 +396,7 @@
X\x{123}\x{123}\x{123}\x{123}\=ps
/X\x{123}{2,4}b/utf
\= Expect no match
Xx\=ps
X\x{123}x\=ps
X\x{123}\x{123}x\=ps
@ -403,6 +404,7 @@
X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\x{123}{2,4}?b/utf
\= Expect no match
Xx\=ps
X\x{123}x\=ps
X\x{123}\x{123}x\=ps
@ -410,6 +412,7 @@
X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\x{123}{2,4}+b/utf
\= Expect no match
Xx\=ps
X\x{123}x\=ps
X\x{123}\x{123}x\=ps
@ -804,6 +807,7 @@
/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
/(?<=\x{1234}\x{1234})\bxy/I,utf
/(?<!^)ETA/utf
\= Expect no match
ETA
@ -834,7 +838,7 @@
/[\p{Nd}+-]+/IB,utf
1234
12-34
12-34
12+\x{661}-34
\= Expect no match
abcd
@ -901,7 +905,7 @@
\x{2068}
\x{2069}
/^\p{Cs}/utf
/^\p{Cs}/utf
\x{dfff}\=no_utf_check
\= Expect no match
\x{09f}
@ -918,7 +922,7 @@
\x{230a}
/^\p{Sc}+/utf
$\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
$\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
\x{9f2}
\= Expect no match
X
@ -928,7 +932,7 @@
\ \
\x{a0}
\x{1680}
\x{2000}
\x{2000}
\x{2001}
\= Expect no match
\x{2028}
@ -937,31 +941,31 @@
# These are here because Perl has problems with the negative versions of the
# properties and has changed how it behaves for caseless matching.
/\p{^Lu}/i,utf
/\p{^Lu}/i,utf
1234
\= Expect no match
ABC
/\P{Lu}/i,utf
/\P{Lu}/i,utf
1234
\= Expect no match
ABC
/\p{Ll}/i,utf
a
a
Az
\= Expect no match
ABC
/\p{Lu}/i,utf
A
A
a\x{10a0}B
\= Expect no match
a
\x{1d00}
/\p{Lu}/i,utf
A
A
aZ
\= Expect no match
abc
@ -1018,12 +1022,12 @@
ABCD
1234
\x{6ca}
\x{a6c}
\x{a6c}
\x{10a7}
\= Expect no match
_ABC
/^\p{Xan}+/utf
/^\p{Xan}+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
\= Expect no match
_ABC
@ -1044,18 +1048,18 @@
ABCD1234_
1234abcd_
\x{6ca}
\x{a6c}
\x{a6c}
\x{10a7}
\= Expect no match
_ABC
/^[\p{Xan}]+/utf
/^[\p{Xan}]+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
\= Expect no match
_ABC
/^>\p{Xsp}/utf
>\x{1680}\x{2028}\x{0b}
>\x{1680}\x{2028}\x{0b}
>\x{a0}
\= Expect no match
\x{0b}
@ -1082,7 +1086,7 @@
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}/utf
>\x{1680}\x{2028}\x{0b}
>\x{1680}\x{2028}\x{0b}
>\x{a0}
\= Expect no match
\x{0b}
@ -1113,7 +1117,7 @@
1234
\x{6ca}
\x{a6c}
\x{10a7}
\x{10a7}
_ABC
\= Expect no match
[]
@ -1138,7 +1142,7 @@
1234abcd_
\x{6ca}
\x{a6c}
\x{10a7}
\x{10a7}
_ABC
\= Expect no match
[]
@ -1232,7 +1236,7 @@
# Without PCRE_UCP, non-ASCII always fail, even if < 256
/\b...\B/utf
/\b...\B/utf
abc_
\= Expect no match
\x{37e}abc\x{376}
@ -1288,9 +1292,11 @@
/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
# These behaved oddly in Perl, so they are kept in this test
/(\x{23a}\x{23a}\x{23a})?\1/i,utf
\= Expect no match
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
/(ȺȺȺ)?\1/i,utf
\= Expect no match
ȺȺȺⱥⱥ
@ -1300,9 +1306,11 @@
/(ȺȺȺ)?\1/i,utf
ȺȺȺⱥⱥⱥ
/(\x{23a}\x{23a}\x{23a})\1/i,utf
\= Expect no match
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
/(ȺȺȺ)\1/i,utf
\= Expect no match
ȺȺȺⱥⱥ
@ -1328,19 +1336,19 @@
# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
/^[\p{Batak}]/utf
\x{1bc0}
\x{1bc0}
\x{1bff}
\= Expect no match
\x{1bf4}
/^[\p{Brahmi}]/utf
\x{11000}
\x{11000}
\x{1106f}
\= Expect no match
\x{1104e}
/^[\p{Mandaic}]/utf
\x{840}
\x{840}
\x{85e}
\= Expect no match
\x{85c}
@ -1355,11 +1363,9 @@
/^\X/utf
́réo
/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
aX41z
\= Expect no match
aAz
aAz
/\X/
@ -1453,7 +1459,7 @@
/\x{3a3}+./i,utf,aftertext
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}++./i,utf,aftertext
\= Expect no match
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
@ -1463,19 +1469,24 @@
/[^\x{3a3}]*\x{3c2}/Bi,utf
/[^a]*\x{3c2}/Bi,utf
/ist/Bi,utf
\= Expect no match
ikt
/is+t/i,utf
iSs\x{17f}t
\= Expect no match
ikt
/is+?t/i,utf
\= Expect no match
ikt
/is?t/i,utf
\= Expect no match
ikt
/is{2}t/i,utf
\= Expect no match
iskt
@ -1485,52 +1496,52 @@
/^\p{Xuc}/utf
$abc
@abc
`abc
`abc
\x{1234}abc
\= Expect no match
abc
/^\p{Xuc}+/utf
/^\p{Xuc}+/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}+?/utf
/^\p{Xuc}+?/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}+?\*/utf
/^\p{Xuc}+?\*/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}++/utf
/^\p{Xuc}++/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}{3,5}/utf
/^\p{Xuc}{3,5}/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}{3,5}?/utf
/^\p{Xuc}{3,5}?/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^[\p{Xuc}]/utf
/^[\p{Xuc}]/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^[\p{Xuc}]+/utf
/^[\p{Xuc}]+/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\P{Xuc}/utf
/^\P{Xuc}/utf
abc
\= Expect no match
$abc
@ -1538,7 +1549,7 @@
`abc
\x{1234}abc
/^[\P{Xuc}]/utf
/^[\P{Xuc}]/utf
abc
\= Expect no match
$abc
@ -1603,13 +1614,13 @@
/[\p{N}]?+/B,no_auto_possess
/[\p{L}ab]{2,3}+/B,no_auto_possess
/[\p{L}ab]{2,3}+/B,no_auto_possess
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
/.+\X/Bsx
/\X+$/Bmx
/\X+$/Bmx
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
@ -1634,9 +1645,7 @@
/ábc/utf,replace=XሴZ
123ábc123
/(?<=abc)(|def)/g,utf,replace=<$0>
123abcáyzabcdef789abcሴqr
/(?<=abc)(|def)/g,utf,replace=<$0>
123abcáyzabcdef789abcሴqr
@ -1651,4 +1660,107 @@
"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
/[\pS#moq]/
=
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
cxxxz
/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
abcd
/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
a\x{e0}\x{101}\x{c0}\x{102}
/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
ab12cde
/(*UCP)(*UTF)[[:>:]]X/B
/abc/utf,replace=xyz
abc\=zero_terminate
/a[[:punct:]b]/ucp,bincode
/a[[:punct:]b]/utf,ucp,bincode
/a[b[:punct:]]/utf,ucp,bincode
/[[:^ascii:]]/utf,ucp,bincode
/[[:^ascii:]\w]/utf,ucp,bincode
/[\w[:^ascii:]]/utf,ucp,bincode
/[^[:ascii:]\W]/utf,ucp,bincode
\x{de}
\x{200}
\= Expect no match
\x{300}
\x{37e}
/[[:^ascii:]a]/utf,ucp,bincode
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
/[\D]/utf
\x{1d7cf}
/[\D\P{Nd}]/utf
\x{1d7cf}
/[^\D]/utf
a9b
\= Expect no match
\x{1d7cf}
/[^\D\P{Nd}]/utf
a9b
\x{1d7cf}
\= Expect no match
\x{10000}
# Hex uses pattern length, not zero-terminated. This tests for overrunning
# the given length of a pattern.
/'(*UTF)'/hex
/'#('/hex,extended,utf
/a(?<=A\XB)/utf
/ab(?<=A\RB)/utf
/../utf,auto_callout
\n\x{123}\x{123}\x{123}\x{123}
# This tests processing wide characters in extended mode.
/XȀ/x,utf
# These three test a bug fix that was not clearing up after a locale setting
# when the test or a subsequent one matched a wide character.
//locale=C
/[\P{Yi}]/utf
\x{2f000}
/[\P{Yi}]/utf,locale=C
\x{2f000}
/^(?<!(?=􃡜))/B,utf
# Horizontal and vertical space lists ignore caseless
/[\HH]/Bi,utf
/[^\HH]/Bi,utf