 36af74cb25
			
		
	
	36af74cb25
	
	
	
		
			
			Some manual changes done to the library were lost with this update. They will be added in the next commit.
		
			
				
	
	
		
			1767 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			1767 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # This set of tests checks the API, internals, and non-Perl stuff for UTF
 | |
| # support, including Unicode properties. However, tests that give different
 | |
| # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
 | |
| # 12).
 | |
| 
 | |
| #newline_default lf any anycrlf
 | |
| 
 | |
| # PCRE2 and Perl disagree about the characteristics of certain Unicode
 | |
| # characters. For example, 061C is considered by Perl to be Arabic, though
 | |
| # is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are
 | |
| # graphic and printable according to Perl, though they are actually "isolate"
 | |
| # control characters. That is why the following tests are here rather than in
 | |
| # test 4.
 | |
| 
 | |
| /^[\p{Arabic}]/utf
 | |
| \= Expect no match
 | |
|     \x{061c}
 | |
|     
 | |
| /^[[:graph:]]+$/utf,ucp
 | |
| \= Expect no match
 | |
|     \x{61c}
 | |
|     \x{2066}
 | |
|     \x{2067}
 | |
|     \x{2068}
 | |
|     \x{2069}
 | |
| 
 | |
| /^[[:print:]]+$/utf,ucp
 | |
| \= Expect no match
 | |
|     \x{61c}
 | |
|     \x{2066}
 | |
|     \x{2067}
 | |
|     \x{2068}
 | |
|     \x{2069}
 | |
| 
 | |
| /^[[:^graph:]]+$/utf,ucp
 | |
|     \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
 | |
|     \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
 | |
| 
 | |
| /^[[:^print:]]+$/utf,ucp
 | |
|     \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
 | |
|     \x{2068}\x{2069}
 | |
|      
 | |
| # Perl does not consider U+180e to be a space character. It is true that it
 | |
| # does not appear in the Unicode PropList.txt file as such, but in many other
 | |
| # sources it is listed as a space, and has been treated as such in PCRE for
 | |
| # a long time. 
 | |
| 
 | |
| /^>[[:blank:]]*/utf,ucp
 | |
|     >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 
 | |
| 
 | |
| /^A\s+Z/utf,ucp
 | |
|     A\x{85}\x{180e}\x{2005}Z
 | |
| 
 | |
| /^A[\s]+Z/utf,ucp
 | |
|     A\x{2005}Z
 | |
|     A\x{85}\x{2005}Z
 | |
|     
 | |
| /^[[:graph:]]+$/utf,ucp
 | |
| \= Expect no match
 | |
|     \x{180e}
 | |
| 
 | |
| /^[[:print:]]+$/utf,ucp
 | |
|     \x{180e}
 | |
| 
 | |
| /^[[:^graph:]]+$/utf,ucp
 | |
|     \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
 | |
| 
 | |
| /^[[:^print:]]+$/utf,ucp
 | |
| \= Expect no match
 | |
|     \x{180e}
 | |
| 
 | |
| # End of U+180E tests.
 | |
| 
 | |
| # ---------------------------------------------------------------------
 | |
| 
 | |
| /\x{110000}/IB,utf
 | |
| 
 | |
| /\o{4200000}/IB,utf
 | |
| 
 | |
| /\x{ffffffff}/utf
 | |
| 
 | |
| /\o{37777777777}/utf
 | |
| 
 | |
| /\x{100000000}/utf
 | |
| 
 | |
| /\o{77777777777}/utf
 | |
| 
 | |
| /\x{d800}/utf
 | |
| 
 | |
| /\o{154000}/utf
 | |
| 
 | |
| /\x{dfff}/utf
 | |
| 
 | |
| /\o{157777}/utf
 | |
| 
 | |
| /\x{d7ff}/utf
 | |
| 
 | |
| /\o{153777}/utf
 | |
| 
 | |
| /\x{e000}/utf
 | |
| 
 | |
| /\o{170000}/utf
 | |
| 
 | |
| /^\x{100}a\x{1234}/utf
 | |
|     \x{100}a\x{1234}bcd
 | |
| 
 | |
| /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
 | |
|     \x{0041}\x{2262}\x{0391}\x{002e}
 | |
|     
 | |
| /.{3,5}X/IB,utf
 | |
|     \x{212ab}\x{212ab}\x{212ab}\x{861}X
 | |
| 
 | |
| /.{3,5}?/IB,utf
 | |
|     \x{212ab}\x{212ab}\x{212ab}\x{861}
 | |
| 
 | |
| /^[ab]/IB,utf
 | |
|     bar
 | |
| \= Expect no match
 | |
|     c
 | |
|     \x{ff}
 | |
|     \x{100}  
 | |
| 
 | |
| /^[^ab]/IB,utf
 | |
|     c
 | |
|     \x{ff}
 | |
|     \x{100}  
 | |
| \= Expect no match 
 | |
|     aaa
 | |
|   
 | |
| /\x{100}*(\d+|"(?1)")/utf
 | |
|     1234
 | |
|     "1234" 
 | |
|     \x{100}1234
 | |
|     "\x{100}1234"  
 | |
|     \x{100}\x{100}12ab 
 | |
|     \x{100}\x{100}"12" 
 | |
| \= Expect no match 
 | |
|     \x{100}\x{100}abcd
 | |
| 
 | |
| /\x{100}*/IB,utf
 | |
| 
 | |
| /a\x{100}*/IB,utf
 | |
| 
 | |
| /ab\x{100}*/IB,utf
 | |
| 
 | |
| /[\x{200}-\x{100}]/utf
 | |
| 
 | |
| /[Ā-Ą]/utf
 | |
|     \x{100}
 | |
|     \x{104}
 | |
| \= Expect no match
 | |
|     \x{105}
 | |
|     \x{ff}    
 | |
| 
 | |
| /[\xFF]/IB
 | |
|     >\xff<
 | |
| 
 | |
| /[^\xFF]/IB
 | |
| 
 | |
| /[Ä-Ü]/utf
 | |
|     Ö # Matches without Study
 | |
|     \x{d6}
 | |
|     
 | |
| /[Ä-Ü]/utf
 | |
|     Ö <-- Same with Study
 | |
|     \x{d6}
 | |
|     
 | |
| /[\x{c4}-\x{dc}]/utf
 | |
|     Ö # Matches without Study
 | |
|     \x{d6} 
 | |
| 
 | |
| /[\x{c4}-\x{dc}]/utf
 | |
|     Ö <-- Same with Study
 | |
|     \x{d6} 
 | |
| 
 | |
| /[^\x{100}]abc(xyz(?1))/IB,utf
 | |
| 
 | |
| /(\x{100}(b(?2)c))?/IB,utf
 | |
| 
 | |
| /(\x{100}(b(?2)c)){0,2}/IB,utf
 | |
| 
 | |
| /(\x{100}(b(?1)c))?/IB,utf
 | |
| 
 | |
| /(\x{100}(b(?1)c)){0,2}/IB,utf
 | |
| 
 | |
| /\W/utf
 | |
|     A.B
 | |
|     A\x{100}B 
 | |
|   
 | |
| /\w/utf
 | |
|     \x{100}X   
 | |
| 
 | |
| /^\ሴ/IB,utf
 | |
| 
 | |
| /()()()()()()()()()()
 | |
|  ()()()()()()()()()()
 | |
|  ()()()()()()()()()()
 | |
|  ()()()()()()()()()()
 | |
|  A (x) (?41) B/x,utf
 | |
|     AxxB     
 | |
| 
 | |
| /^[\x{100}\E-\Q\E\x{150}]/B,utf
 | |
| 
 | |
| /^[\QĀ\E-\QŐ\E]/B,utf
 | |
| 
 | |
| /^abc./gmx,newline=any,utf
 | |
|     abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
 | |
| 
 | |
| /abc.$/gmx,newline=any,utf
 | |
|     abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
 | |
| 
 | |
| /^a\Rb/bsr=unicode,utf
 | |
|     a\nb
 | |
|     a\rb
 | |
|     a\r\nb
 | |
|     a\x0bb
 | |
|     a\x0cb
 | |
|     a\x{85}b   
 | |
|     a\x{2028}b 
 | |
|     a\x{2029}b 
 | |
| \= Expect no match
 | |
|     a\n\rb    
 | |
| 
 | |
| /^a\R*b/bsr=unicode,utf
 | |
|     ab
 | |
|     a\nb
 | |
|     a\rb
 | |
|     a\r\nb
 | |
|     a\x0bb
 | |
|     a\x0c\x{2028}\x{2029}b
 | |
|     a\x{85}b   
 | |
|     a\n\rb    
 | |
|     a\n\r\x{85}\x0cb 
 | |
| 
 | |
| /^a\R+b/bsr=unicode,utf
 | |
|     a\nb
 | |
|     a\rb
 | |
|     a\r\nb
 | |
|     a\x0bb
 | |
|     a\x0c\x{2028}\x{2029}b
 | |
|     a\x{85}b   
 | |
|     a\n\rb    
 | |
|     a\n\r\x{85}\x0cb 
 | |
| \= Expect no match
 | |
|     ab  
 | |
| 
 | |
| /^a\R{1,3}b/bsr=unicode,utf
 | |
|     a\nb
 | |
|     a\n\rb
 | |
|     a\n\r\x{85}b
 | |
|     a\r\n\r\nb 
 | |
|     a\r\n\r\n\r\nb 
 | |
|     a\n\r\n\rb
 | |
|     a\n\n\r\nb 
 | |
| \= Expect no match
 | |
|     a\n\n\n\rb
 | |
|     a\r
 | |
| 
 | |
| /\H\h\V\v/utf
 | |
|     X X\x0a
 | |
|     X\x09X\x0b
 | |
| \= Expect no match
 | |
|     \x{a0} X\x0a   
 | |
|     
 | |
| /\H*\h+\V?\v{3,4}/utf
 | |
|     \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
 | |
|     \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
 | |
|     \x09\x20\x{a0}\x0a\x0b\x0c
 | |
| \= Expect no match 
 | |
|     \x09\x20\x{a0}\x0a\x0b
 | |
|      
 | |
| /\H\h\V\v/utf
 | |
|     \x{3001}\x{3000}\x{2030}\x{2028}
 | |
|     X\x{180e}X\x{85}
 | |
| \= Expect no match
 | |
|     \x{2009} X\x0a   
 | |
|     
 | |
| /\H*\h+\V?\v{3,4}/utf
 | |
|     \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
 | |
|     \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
 | |
|     \x09\x20\x{202f}\x0a\x0b\x0c
 | |
| \= Expect no match 
 | |
|     \x09\x{200a}\x{a0}\x{2028}\x0b
 | |
|      
 | |
| /[\h]/B,utf
 | |
|     >\x{1680}
 | |
| 
 | |
| /[\h]{3,}/B,utf
 | |
|     >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
 | |
| 
 | |
| /[\v]/B,utf
 | |
| 
 | |
| /[\H]/B,utf
 | |
| 
 | |
| /[\V]/B,utf
 | |
| 
 | |
| /.*$/newline=any,utf
 | |
|     \x{1ec5} 
 | |
|     
 | |
| /a\Rb/I,bsr=anycrlf,utf
 | |
|     a\rb
 | |
|     a\nb
 | |
|     a\r\nb
 | |
| \= Expect no match
 | |
|     a\x{85}b
 | |
|     a\x0bb     
 | |
| 
 | |
| /a\Rb/I,bsr=unicode,utf
 | |
|     a\rb
 | |
|     a\nb
 | |
|     a\r\nb
 | |
|     a\x{85}b
 | |
|     a\x0bb     
 | |
|     
 | |
| /a\R?b/I,bsr=anycrlf,utf
 | |
|     a\rb
 | |
|     a\nb
 | |
|     a\r\nb
 | |
| \= Expect no match
 | |
|     a\x{85}b
 | |
|     a\x0bb     
 | |
| 
 | |
| /a\R?b/I,bsr=unicode,utf
 | |
|     a\rb
 | |
|     a\nb
 | |
|     a\r\nb
 | |
|     a\x{85}b
 | |
|     a\x0bb     
 | |
|  
 | |
| /.*a.*=.b.*/utf,newline=any
 | |
|     QQQ\x{2029}ABCaXYZ=!bPQR
 | |
| \= Expect no match
 | |
|     a\x{2029}b
 | |
|     \x61\xe2\x80\xa9\x62 
 | |
| 
 | |
| /[[:a\x{100}b:]]/utf
 | |
| 
 | |
| /a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref
 | |
|     a\x{1234}b
 | |
|     a\nb 
 | |
| \= Expect no match
 | |
|     ab  
 | |
|     
 | |
| /a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref
 | |
|     aXb
 | |
|     a\nX\nX\x{1234}b 
 | |
| \= Expect no match
 | |
|     ab  
 | |
| 
 | |
| /(\x{de})\1/
 | |
|     \x{de}\x{de}
 | |
| 
 | |
| /X/newline=any,utf,firstline
 | |
|     A\x{1ec5}ABCXYZ
 | |
| 
 | |
| /Xa{2,4}b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
|     
 | |
| /Xa{2,4}?b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
|     
 | |
| /Xa{2,4}+b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
|     
 | |
| /X\x{123}{2,4}b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X\x{123}{2,4}?b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X\x{123}{2,4}+b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X\x{123}{2,4}b/utf
 | |
| \= Expect no match
 | |
|     Xx\=ps
 | |
|     X\x{123}x\=ps
 | |
|     X\x{123}\x{123}x\=ps
 | |
|     X\x{123}\x{123}\x{123}x\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}x\=ps
 | |
|     
 | |
| /X\x{123}{2,4}?b/utf
 | |
| \= Expect no match
 | |
|     Xx\=ps
 | |
|     X\x{123}x\=ps
 | |
|     X\x{123}\x{123}x\=ps
 | |
|     X\x{123}\x{123}\x{123}x\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}x\=ps
 | |
|     
 | |
| /X\x{123}{2,4}+b/utf
 | |
| \= Expect no match
 | |
|     Xx\=ps
 | |
|     X\x{123}x\=ps
 | |
|     X\x{123}\x{123}x\=ps
 | |
|     X\x{123}\x{123}\x{123}x\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}x\=ps
 | |
|     
 | |
| /X\d{2,4}b/utf
 | |
|     X\=ps
 | |
|     X3\=ps
 | |
|     X33\=ps
 | |
|     X333\=ps
 | |
|     X3333\=ps
 | |
|     
 | |
| /X\d{2,4}?b/utf
 | |
|     X\=ps
 | |
|     X3\=ps
 | |
|     X33\=ps
 | |
|     X333\=ps
 | |
|     X3333\=ps
 | |
|     
 | |
| /X\d{2,4}+b/utf
 | |
|     X\=ps
 | |
|     X3\=ps
 | |
|     X33\=ps
 | |
|     X333\=ps
 | |
|     X3333\=ps
 | |
| 
 | |
| /X\D{2,4}b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
|     
 | |
| /X\D{2,4}?b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
|     
 | |
| /X\D{2,4}+b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
| 
 | |
| /X\D{2,4}b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X\D{2,4}?b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X\D{2,4}+b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
| 
 | |
| /X[abc]{2,4}b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
|     
 | |
| /X[abc]{2,4}?b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
|     
 | |
| /X[abc]{2,4}+b/utf
 | |
|     X\=ps
 | |
|     Xa\=ps
 | |
|     Xaa\=ps
 | |
|     Xaaa\=ps
 | |
|     Xaaaa\=ps
 | |
| 
 | |
| /X[abc\x{123}]{2,4}b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X[abc\x{123}]{2,4}?b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X[abc\x{123}]{2,4}+b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
| 
 | |
| /X[^a]{2,4}b/utf
 | |
|     X\=ps
 | |
|     Xz\=ps
 | |
|     Xzz\=ps
 | |
|     Xzzz\=ps
 | |
|     Xzzzz\=ps
 | |
|     
 | |
| /X[^a]{2,4}?b/utf
 | |
|     X\=ps
 | |
|     Xz\=ps
 | |
|     Xzz\=ps
 | |
|     Xzzz\=ps
 | |
|     Xzzzz\=ps
 | |
|     
 | |
| /X[^a]{2,4}+b/utf
 | |
|     X\=ps
 | |
|     Xz\=ps
 | |
|     Xzz\=ps
 | |
|     Xzzz\=ps
 | |
|     Xzzzz\=ps
 | |
| 
 | |
| /X[^a]{2,4}b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X[^a]{2,4}?b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /X[^a]{2,4}+b/utf
 | |
|     X\=ps
 | |
|     X\x{123}\=ps
 | |
|     X\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\=ps
 | |
|     X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
| 
 | |
| /(Y)X\1{2,4}b/utf
 | |
|     YX\=ps
 | |
|     YXY\=ps
 | |
|     YXYY\=ps
 | |
|     YXYYY\=ps
 | |
|     YXYYYY\=ps
 | |
|     
 | |
| /(Y)X\1{2,4}?b/utf
 | |
|     YX\=ps
 | |
|     YXY\=ps
 | |
|     YXYY\=ps
 | |
|     YXYYY\=ps
 | |
|     YXYYYY\=ps
 | |
|     
 | |
| /(Y)X\1{2,4}+b/utf
 | |
|     YX\=ps
 | |
|     YXY\=ps
 | |
|     YXYY\=ps
 | |
|     YXYYY\=ps
 | |
|     YXYYYY\=ps
 | |
| 
 | |
| /(\x{123})X\1{2,4}b/utf
 | |
|     \x{123}X\=ps
 | |
|     \x{123}X\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /(\x{123})X\1{2,4}?b/utf
 | |
|     \x{123}X\=ps
 | |
|     \x{123}X\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
|     
 | |
| /(\x{123})X\1{2,4}+b/utf
 | |
|     \x{123}X\=ps
 | |
|     \x{123}X\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\x{123}\=ps
 | |
|     \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
 | |
| 
 | |
| /\bthe cat\b/utf
 | |
|     the cat\=ps
 | |
|     the cat\=ph
 | |
| 
 | |
| /abcd*/utf
 | |
|     xxxxabcd\=ps
 | |
|     xxxxabcd\=ph
 | |
| 
 | |
| /abcd*/i,utf
 | |
|     xxxxabcd\=ps
 | |
|     xxxxabcd\=ph
 | |
|     XXXXABCD\=ps
 | |
|     XXXXABCD\=ph
 | |
| 
 | |
| /abc\d*/utf
 | |
|     xxxxabc1\=ps
 | |
|     xxxxabc1\=ph
 | |
| 
 | |
| /(a)bc\1*/utf
 | |
|     xxxxabca\=ps
 | |
|     xxxxabca\=ph
 | |
| 
 | |
| /abc[de]*/utf
 | |
|     xxxxabcde\=ps
 | |
|     xxxxabcde\=ph
 | |
| 
 | |
| /X\W{3}X/utf
 | |
|     X\=ps
 | |
| 
 | |
| /\sxxx\s/utf,tables=2
 | |
|     AB\x{85}xxx\x{a0}XYZ
 | |
|     AB\x{a0}xxx\x{85}XYZ
 | |
| 
 | |
| /\S \S/utf,tables=2
 | |
|     \x{a2} \x{84} 
 | |
| 
 | |
| 'A#хц'Bx,newline=any,utf
 | |
| 
 | |
| 'A#хц
 | |
|   PQ'Bx,newline=any,utf
 | |
|   
 | |
| /a+#хaa
 | |
|   z#XX?/Bx,newline=any,utf
 | |
| 
 | |
| /a+#хaa
 | |
|   z#х?/Bx,newline=any,utf
 | |
| 
 | |
| /\g{A}xxx#bXX(?'A'123)
 | |
| (?'A'456)/Bx,newline=any,utf
 | |
| 
 | |
| /\g{A}xxx#bх(?'A'123)
 | |
| (?'A'456)/Bx,newline=any,utf
 | |
| 
 | |
| /^\cģ/utf
 | |
| 
 | |
| /(\R*)(.)/s,utf
 | |
|     \r\n
 | |
|     \r\r\n\n\r 
 | |
|     \r\r\n\n\r\n 
 | |
| 
 | |
| /(\R)*(.)/s,utf
 | |
|     \r\n
 | |
|     \r\r\n\n\r 
 | |
|     \r\r\n\n\r\n 
 | |
| 
 | |
| /[^\x{1234}]+/Ii,utf
 | |
| 
 | |
| /[^\x{1234}]+?/Ii,utf
 | |
| 
 | |
| /[^\x{1234}]++/Ii,utf
 | |
| 
 | |
| /[^\x{1234}]{2}/Ii,utf
 | |
| 
 | |
| /f.*/
 | |
|     for\=ph
 | |
| 
 | |
| /f.*/s
 | |
|     for\=ph
 | |
| 
 | |
| /f.*/utf
 | |
|     for\=ph
 | |
| 
 | |
| /f.*/s,utf
 | |
|     for\=ph
 | |
|     
 | |
| /\x{d7ff}\x{e000}/utf
 | |
| 
 | |
| /\x{d800}/utf
 | |
| 
 | |
| /\x{dfff}/utf
 | |
| 
 | |
| /\h+/utf
 | |
|     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
 | |
|     \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
 | |
| 
 | |
| /[\h\x{e000}]+/B,utf
 | |
|     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
 | |
|     \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
 | |
| 
 | |
| /\H+/utf
 | |
|     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
 | |
|     \x{2000}\x{200a}\x{1fff}\x{200b}
 | |
|     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
 | |
|     \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
 | |
| 
 | |
| /[\H\x{d7ff}]+/B,utf
 | |
|     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
 | |
|     \x{2000}\x{200a}\x{1fff}\x{200b}
 | |
|     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
 | |
|     \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
 | |
| 
 | |
| /\v+/utf
 | |
|     \x{2027}\x{2030}\x{2028}\x{2029}
 | |
|     \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
 | |
| 
 | |
| /[\v\x{e000}]+/B,utf
 | |
|     \x{2027}\x{2030}\x{2028}\x{2029}
 | |
|     \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
 | |
| 
 | |
| /\V+/utf
 | |
|     \x{2028}\x{2029}\x{2027}\x{2030}
 | |
|     \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
 | |
| 
 | |
| /[\V\x{d7ff}]+/B,utf
 | |
|     \x{2028}\x{2029}\x{2027}\x{2030}
 | |
|     \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
 | |
| 
 | |
| /\R+/bsr=unicode,utf
 | |
|     \x{2027}\x{2030}\x{2028}\x{2029}
 | |
|     \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
 | |
| 
 | |
| /(..)\1/utf
 | |
|     ab\=ps
 | |
|     aba\=ps
 | |
|     abab\=ps
 | |
| 
 | |
| /(..)\1/i,utf
 | |
|     ab\=ps
 | |
|     abA\=ps
 | |
|     aBAb\=ps
 | |
| 
 | |
| /(..)\1{2,}/utf
 | |
|     ab\=ps
 | |
|     aba\=ps
 | |
|     abab\=ps
 | |
|     ababa\=ps
 | |
|     ababab\=ps
 | |
|     ababab\=ph
 | |
|     abababa\=ps
 | |
|     abababa\=ph
 | |
| 
 | |
| /(..)\1{2,}/i,utf
 | |
|     ab\=ps
 | |
|     aBa\=ps
 | |
|     aBAb\=ps
 | |
|     AbaBA\=ps
 | |
|     abABAb\=ps
 | |
|     aBAbaB\=ph
 | |
|     abABabA\=ps
 | |
|     abaBABa\=ph
 | |
| 
 | |
| /(..)\1{2,}?x/i,utf
 | |
|     ab\=ps
 | |
|     abA\=ps
 | |
|     aBAb\=ps
 | |
|     abaBA\=ps
 | |
|     abAbaB\=ps
 | |
|     abaBabA\=ps
 | |
|     abAbABaBx\=ps
 | |
| 
 | |
| /./utf,newline=crlf
 | |
|     \r\=ps
 | |
|     \r\=ph
 | |
|   
 | |
| /.{2,3}/utf,newline=crlf
 | |
|     \r\=ps
 | |
|     \r\=ph
 | |
|     \r\r\=ps
 | |
|     \r\r\=ph
 | |
|     \r\r\r\=ps
 | |
|     \r\r\r\=ph
 | |
| 
 | |
| /.{2,3}?/utf,newline=crlf
 | |
|     \r\=ps
 | |
|     \r\=ph
 | |
|     \r\r\=ps
 | |
|     \r\r\=ph
 | |
|     \r\r\r\=ps
 | |
|     \r\r\r\=ph
 | |
| 
 | |
| /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
 | |
| 
 | |
| /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
 | |
| 
 | |
| /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
 | |
| 
 | |
| /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
 | |
| 
 | |
| /(?<=\x{1234}\x{1234})\bxy/I,utf
 | |
| 
 | |
| /(?<!^)ETA/utf
 | |
| \= Expect no match
 | |
|     ETA
 | |
| 
 | |
| /\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
 | |
| 
 | |
| /[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
 | |
| 
 | |
| /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
 | |
| 
 | |
| /^a+[a\x{200}]/B,utf
 | |
|     aa
 | |
| 
 | |
| /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
 | |
| 
 | |
| /[\p{L}]/IB
 | |
| 
 | |
| /[\p{^L}]/IB
 | |
| 
 | |
| /[\P{L}]/IB
 | |
| 
 | |
| /[\P{^L}]/IB
 | |
| 
 | |
| /[abc\p{L}\x{0660}]/IB,utf
 | |
| 
 | |
| /[\p{Nd}]/IB,utf
 | |
|     1234
 | |
| 
 | |
| /[\p{Nd}+-]+/IB,utf
 | |
|     1234
 | |
|     12-34
 | |
|     12+\x{661}-34  
 | |
| \= Expect no match
 | |
|     abcd  
 | |
| 
 | |
| /(?:[\PPa*]*){8,}/
 | |
| 
 | |
| /[\P{Any}]/B
 | |
| 
 | |
| /[\P{Any}\E]/B
 | |
| 
 | |
| /(\P{Yi}+\277)/
 | |
| 
 | |
| /(\P{Yi}+\277)?/
 | |
| 
 | |
| /(?<=\P{Yi}{3}A)X/
 | |
| 
 | |
| /\p{Yi}+(\P{Yi}+)(?1)/
 | |
| 
 | |
| /(\P{Yi}{2}\277)?/
 | |
| 
 | |
| /[\P{Yi}A]/
 | |
| 
 | |
| /[\P{Yi}\P{Yi}\P{Yi}A]/
 | |
| 
 | |
| /[^\P{Yi}A]/
 | |
| 
 | |
| /[^\P{Yi}\P{Yi}\P{Yi}A]/
 | |
| 
 | |
| /(\P{Yi}*\277)*/
 | |
| 
 | |
| /(\P{Yi}*?\277)*/
 | |
| 
 | |
| /(\p{Yi}*+\277)*/
 | |
| 
 | |
| /(\P{Yi}?\277)*/
 | |
| 
 | |
| /(\P{Yi}??\277)*/
 | |
| 
 | |
| /(\p{Yi}?+\277)*/
 | |
| 
 | |
| /(\P{Yi}{0,3}\277)*/
 | |
| 
 | |
| /(\P{Yi}{0,3}?\277)*/
 | |
| 
 | |
| /(\p{Yi}{0,3}+\277)*/
 | |
| 
 | |
| /\p{Zl}{2,3}+/B,utf
 | |
|     
 | |
|     \x{2028}\x{2028}\x{2028}
 | |
|     
 | |
| /\p{Zl}/B,utf
 | |
| 
 | |
| /\p{Lu}{3}+/B,utf
 | |
| 
 | |
| /\pL{2}+/B,utf
 | |
| 
 | |
| /\p{Cc}{2}+/B,utf
 | |
| 
 | |
| /^\p{Cf}/utf
 | |
|     \x{180e}
 | |
|     \x{061c}
 | |
|     \x{2066}
 | |
|     \x{2067}
 | |
|     \x{2068}
 | |
|     \x{2069}
 | |
| 
 | |
| /^\p{Cs}/utf
 | |
|     \x{dfff}\=no_utf_check
 | |
| \= Expect no match
 | |
|     \x{09f} 
 | |
|   
 | |
| /^\p{Mn}/utf
 | |
|     \x{1a1b}
 | |
| 
 | |
| /^\p{Pe}/utf
 | |
|     \x{2309}
 | |
|     \x{230b}
 | |
| 
 | |
| /^\p{Ps}/utf
 | |
|     \x{2308}
 | |
|     \x{230a}
 | |
| 
 | |
| /^\p{Sc}+/utf
 | |
|     $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
 | |
|     \x{9f2}
 | |
| \= Expect no match
 | |
|     X
 | |
|     \x{2c2}
 | |
|   
 | |
| /^\p{Zs}/utf
 | |
|     \ \
 | |
|     \x{a0}
 | |
|     \x{1680}
 | |
|     \x{2000}
 | |
|     \x{2001}     
 | |
| \= Expect no match
 | |
|     \x{2028}
 | |
|     \x{200d} 
 | |
|   
 | |
| # These are here because Perl has problems with the negative versions of the
 | |
| # properties and has changed how it behaves for caseless matching.
 | |
|       
 | |
| /\p{^Lu}/i,utf
 | |
|     1234
 | |
| \= Expect no match
 | |
|     ABC 
 | |
| 
 | |
| /\P{Lu}/i,utf
 | |
|     1234
 | |
| \= Expect no match
 | |
|     ABC 
 | |
| 
 | |
| /\p{Ll}/i,utf
 | |
|     a
 | |
|     Az
 | |
| \= Expect no match
 | |
|     ABC   
 | |
| 
 | |
| /\p{Lu}/i,utf
 | |
|     A
 | |
|     a\x{10a0}B 
 | |
| \= Expect no match 
 | |
|     a
 | |
|     \x{1d00}  
 | |
| 
 | |
| /\p{Lu}/i,utf
 | |
|     A
 | |
|     aZ
 | |
| \= Expect no match
 | |
|     abc   
 | |
| 
 | |
| /[\x{c0}\x{391}]/i,utf
 | |
|     \x{c0}
 | |
|     \x{e0} 
 | |
| 
 | |
| # The next two are special cases where the lengths of the different cases of
 | |
| # the same character differ. The first went wrong with heap frame storage; the
 | |
| # second was broken in all cases. 
 | |
| 
 | |
| /^\x{023a}+?(\x{0130}+)/i,utf
 | |
|   \x{023a}\x{2c65}\x{0130}
 | |
|   
 | |
| /^\x{023a}+([^X])/i,utf
 | |
|   \x{023a}\x{2c65}X
 | |
| 
 | |
| /\x{c0}+\x{116}+/i,utf
 | |
|     \x{c0}\x{e0}\x{116}\x{117}
 | |
| 
 | |
| /[\x{c0}\x{116}]+/i,utf
 | |
|     \x{c0}\x{e0}\x{116}\x{117}
 | |
| 
 | |
| /(\x{de})\1/i,utf
 | |
|     \x{de}\x{de}
 | |
|     \x{de}\x{fe}
 | |
|     \x{fe}\x{fe}
 | |
|     \x{fe}\x{de}
 | |
| 
 | |
| /^\x{c0}$/i,utf
 | |
|     \x{c0}
 | |
|     \x{e0} 
 | |
| 
 | |
| /^\x{e0}$/i,utf
 | |
|     \x{c0}
 | |
|     \x{e0} 
 | |
| 
 | |
| # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
 | |
| # will match it only with UCP support, because without that it has no notion
 | |
| # of case for anything other than the ASCII letters.  
 | |
| 
 | |
| /((?i)[\x{c0}])/utf
 | |
|     \x{c0}
 | |
|     \x{e0} 
 | |
| 
 | |
| /(?i:[\x{c0}])/utf
 | |
|     \x{c0}
 | |
|     \x{e0} 
 | |
| 
 | |
| # These are PCRE's extra properties to help with Unicodizing \d etc. 
 | |
| 
 | |
| /^\p{Xan}/utf
 | |
|     ABCD
 | |
|     1234
 | |
|     \x{6ca}
 | |
|     \x{a6c}
 | |
|     \x{10a7}   
 | |
| \= Expect no match
 | |
|     _ABC   
 | |
| 
 | |
| /^\p{Xan}+/utf
 | |
|     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
 | |
| \= Expect no match
 | |
|     _ABC   
 | |
| 
 | |
| /^\p{Xan}+?/utf
 | |
|     \x{6ca}\x{a6c}\x{10a7}_
 | |
| 
 | |
| /^\p{Xan}*/utf
 | |
|     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
 | |
|     
 | |
| /^\p{Xan}{2,9}/utf
 | |
|     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
 | |
|     
 | |
| /^\p{Xan}{2,9}?/utf
 | |
|     \x{6ca}\x{a6c}\x{10a7}_
 | |
|     
 | |
| /^[\p{Xan}]/utf
 | |
|     ABCD1234_
 | |
|     1234abcd_
 | |
|     \x{6ca}
 | |
|     \x{a6c}
 | |
|     \x{10a7}   
 | |
| \= Expect no match
 | |
|     _ABC   
 | |
|  
 | |
| /^[\p{Xan}]+/utf
 | |
|     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
 | |
| \= Expect no match
 | |
|     _ABC   
 | |
| 
 | |
| /^>\p{Xsp}/utf
 | |
|     >\x{1680}\x{2028}\x{0b}
 | |
|     >\x{a0} 
 | |
| \= Expect no match
 | |
|     \x{0b} 
 | |
| 
 | |
| /^>\p{Xsp}+/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
| 
 | |
| /^>\p{Xsp}+?/utf
 | |
|     >\x{1680}\x{2028}\x{0b}
 | |
| 
 | |
| /^>\p{Xsp}*/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
|     
 | |
| /^>\p{Xsp}{2,9}/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
|     
 | |
| /^>\p{Xsp}{2,9}?/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
|     
 | |
| /^>[\p{Xsp}]/utf
 | |
|     >\x{2028}\x{0b}
 | |
|  
 | |
| /^>[\p{Xsp}]+/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
| 
 | |
| /^>\p{Xps}/utf
 | |
|     >\x{1680}\x{2028}\x{0b}
 | |
|     >\x{a0} 
 | |
| \= Expect no match
 | |
|     \x{0b} 
 | |
| 
 | |
| /^>\p{Xps}+/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
| 
 | |
| /^>\p{Xps}+?/utf
 | |
|     >\x{1680}\x{2028}\x{0b}
 | |
| 
 | |
| /^>\p{Xps}*/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
|     
 | |
| /^>\p{Xps}{2,9}/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
|     
 | |
| /^>\p{Xps}{2,9}?/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
|     
 | |
| /^>[\p{Xps}]/utf
 | |
|     >\x{2028}\x{0b}
 | |
|  
 | |
| /^>[\p{Xps}]+/utf
 | |
|     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
 | |
| 
 | |
| /^\p{Xwd}/utf
 | |
|     ABCD
 | |
|     1234
 | |
|     \x{6ca}
 | |
|     \x{a6c}
 | |
|     \x{10a7}
 | |
|     _ABC    
 | |
| \= Expect no match
 | |
|     [] 
 | |
| 
 | |
| /^\p{Xwd}+/utf
 | |
|     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
 | |
| 
 | |
| /^\p{Xwd}+?/utf
 | |
|     \x{6ca}\x{a6c}\x{10a7}_
 | |
| 
 | |
| /^\p{Xwd}*/utf
 | |
|     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
 | |
|     
 | |
| /^\p{Xwd}{2,9}/utf
 | |
|     A_B12\x{6ca}\x{a6c}\x{10a7}
 | |
|     
 | |
| /^\p{Xwd}{2,9}?/utf
 | |
|     \x{6ca}\x{a6c}\x{10a7}_
 | |
|     
 | |
| /^[\p{Xwd}]/utf
 | |
|     ABCD1234_
 | |
|     1234abcd_
 | |
|     \x{6ca}
 | |
|     \x{a6c}
 | |
|     \x{10a7}   
 | |
|     _ABC 
 | |
| \= Expect no match
 | |
|     []   
 | |
|  
 | |
| /^[\p{Xwd}]+/utf
 | |
|     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
 | |
| 
 | |
| # A check not in UTF-8 mode 
 | |
| 
 | |
| /^[\p{Xwd}]+/
 | |
|     ABCD1234_
 | |
|     
 | |
| # Some negative checks 
 | |
| 
 | |
| /^[\P{Xwd}]+/utf
 | |
|     !.+\x{019}\x{35a}AB
 | |
| 
 | |
| /^[\p{^Xwd}]+/utf
 | |
|     !.+\x{019}\x{35a}AB
 | |
| 
 | |
| /[\D]/B,utf,ucp
 | |
|     1\x{3c8}2
 | |
| 
 | |
| /[\d]/B,utf,ucp
 | |
|     >\x{6f4}<
 | |
| 
 | |
| /[\S]/B,utf,ucp
 | |
|     \x{1680}\x{6f4}\x{1680}
 | |
| 
 | |
| /[\s]/B,utf,ucp
 | |
|     >\x{1680}<
 | |
| 
 | |
| /[\W]/B,utf,ucp
 | |
|     A\x{1712}B
 | |
| 
 | |
| /[\w]/B,utf,ucp
 | |
|     >\x{1723}<
 | |
| 
 | |
| /\D/B,utf,ucp
 | |
|     1\x{3c8}2
 | |
| 
 | |
| /\d/B,utf,ucp
 | |
|     >\x{6f4}<
 | |
| 
 | |
| /\S/B,utf,ucp
 | |
|     \x{1680}\x{6f4}\x{1680}
 | |
| 
 | |
| /\s/B,utf,ucp
 | |
|     >\x{1680}>
 | |
| 
 | |
| /\W/B,utf,ucp
 | |
|     A\x{1712}B
 | |
| 
 | |
| /\w/B,utf,ucp
 | |
|     >\x{1723}<
 | |
| 
 | |
| /[[:alpha:]]/B,ucp
 | |
| 
 | |
| /[[:lower:]]/B,ucp
 | |
| 
 | |
| /[[:upper:]]/B,ucp
 | |
| 
 | |
| /[[:alnum:]]/B,ucp
 | |
| 
 | |
| /[[:ascii:]]/B,ucp
 | |
| 
 | |
| /[[:cntrl:]]/B,ucp
 | |
| 
 | |
| /[[:digit:]]/B,ucp
 | |
| 
 | |
| /[[:graph:]]/B,ucp
 | |
| 
 | |
| /[[:print:]]/B,ucp
 | |
| 
 | |
| /[[:punct:]]/B,ucp
 | |
| 
 | |
| /[[:space:]]/B,ucp
 | |
| 
 | |
| /[[:word:]]/B,ucp
 | |
| 
 | |
| /[[:xdigit:]]/B,ucp
 | |
| 
 | |
| # Unicode properties for \b abd \B 
 | |
| 
 | |
| /\b...\B/utf,ucp
 | |
|     abc_
 | |
|     \x{37e}abc\x{376} 
 | |
|     \x{37e}\x{376}\x{371}\x{393}\x{394} 
 | |
|     !\x{c0}++\x{c1}\x{c2} 
 | |
|     !\x{c0}+++++ 
 | |
| 
 | |
| # Without PCRE_UCP, non-ASCII always fail, even if < 256  
 | |
| 
 | |
| /\b...\B/utf
 | |
|     abc_
 | |
| \= Expect no match 
 | |
|     \x{37e}abc\x{376} 
 | |
|     \x{37e}\x{376}\x{371}\x{393}\x{394} 
 | |
|     !\x{c0}++\x{c1}\x{c2} 
 | |
|     !\x{c0}+++++ 
 | |
| 
 | |
| # With PCRE_UCP, non-UTF8 chars that are < 256 still check properties  
 | |
| 
 | |
| /\b...\B/ucp
 | |
|     abc_
 | |
|     !\x{c0}++\x{c1}\x{c2} 
 | |
|     !\x{c0}+++++ 
 | |
| 
 | |
| # Some of these are silly, but they check various combinations 
 | |
| 
 | |
| /[[:^alpha:][:^cntrl:]]+/B,utf,ucp
 | |
|     123
 | |
|     abc 
 | |
| 
 | |
| /[[:^cntrl:][:^alpha:]]+/B,utf,ucp
 | |
|     123
 | |
|     abc 
 | |
| 
 | |
| /[[:alpha:]]+/B,utf,ucp
 | |
|     abc
 | |
| 
 | |
| /[[:^alpha:]\S]+/B,utf,ucp
 | |
|     123
 | |
|     abc 
 | |
| 
 | |
| /[^\d]+/B,utf,ucp
 | |
|     abc123
 | |
|     abc\x{123}
 | |
|     \x{660}abc   
 | |
| 
 | |
| /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
 | |
| 
 | |
| /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
 | |
| 
 | |
| /\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
 | |
| 
 | |
| /\p{Han}+X\p{Greek}+\x{370}/B,utf
 | |
| 
 | |
| /\p{Xan}+!\p{Xan}+A/B
 | |
| 
 | |
| /\p{Xsp}+!\p{Xsp}\t/B
 | |
| 
 | |
| /\p{Xps}+!\p{Xps}\t/B
 | |
| 
 | |
| /\p{Xwd}+!\p{Xwd}_/B
 | |
| 
 | |
| /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
 | |
| 
 | |
| # These behaved oddly in Perl, so they are kept in this test 
 | |
| 
 | |
| /(\x{23a}\x{23a}\x{23a})?\1/i,utf
 | |
| \= Expect no match
 | |
|     \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
 | |
| 
 | |
| /(ȺȺȺ)?\1/i,utf
 | |
| \= Expect no match
 | |
|     ȺȺȺⱥⱥ
 | |
| 
 | |
| /(\x{23a}\x{23a}\x{23a})?\1/i,utf
 | |
|     \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
 | |
| 
 | |
| /(ȺȺȺ)?\1/i,utf
 | |
|     ȺȺȺⱥⱥⱥ
 | |
| 
 | |
| /(\x{23a}\x{23a}\x{23a})\1/i,utf
 | |
| \= Expect no match
 | |
|     \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
 | |
| 
 | |
| /(ȺȺȺ)\1/i,utf
 | |
| \= Expect no match
 | |
|     ȺȺȺⱥⱥ
 | |
| 
 | |
| /(\x{23a}\x{23a}\x{23a})\1/i,utf
 | |
|     \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
 | |
| 
 | |
| /(ȺȺȺ)\1/i,utf
 | |
|     ȺȺȺⱥⱥⱥ
 | |
| 
 | |
| /(\x{2c65}\x{2c65})\1/i,utf
 | |
|     \x{2c65}\x{2c65}\x{23a}\x{23a}
 | |
|     
 | |
| /(ⱥⱥ)\1/i,utf
 | |
|     ⱥⱥȺȺ 
 | |
|     
 | |
| /(\x{23a}\x{23a}\x{23a})\1Y/i,utf
 | |
|     X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
 | |
| 
 | |
| /(\x{2c65}\x{2c65})\1Y/i,utf
 | |
|     X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
 | |
| 
 | |
| # These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE 
 | |
| 
 | |
| /^[\p{Batak}]/utf
 | |
|     \x{1bc0}
 | |
|     \x{1bff}
 | |
| \= Expect no match
 | |
|     \x{1bf4}
 | |
|     
 | |
| /^[\p{Brahmi}]/utf
 | |
|     \x{11000}
 | |
|     \x{1106f}
 | |
| \= Expect no match
 | |
|     \x{1104e}
 | |
|     
 | |
| /^[\p{Mandaic}]/utf
 | |
|     \x{840}
 | |
|     \x{85e}
 | |
| \= Expect no match
 | |
|     \x{85c}
 | |
|     \x{85d}    
 | |
| 
 | |
| /(\X*)(.)/s,utf
 | |
|     A\x{300}
 | |
| 
 | |
| /^S(\X*)e(\X*)$/utf
 | |
|     Stéréo
 | |
|     
 | |
| /^\X/utf
 | |
|     ́réo
 | |
| 
 | |
| /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
 | |
|     aX41z
 | |
| \= Expect no match
 | |
|     aAz
 | |
| 
 | |
| /\X/
 | |
|     a\=ps
 | |
|     a\=ph
 | |
| 
 | |
| /\Xa/
 | |
|     aa\=ps
 | |
|     aa\=ph
 | |
| 
 | |
| /\X{2}/
 | |
|     aa\=ps
 | |
|     aa\=ph
 | |
| 
 | |
| /\X+a/
 | |
|     a\=ps
 | |
|     aa\=ps
 | |
|     aa\=ph
 | |
| 
 | |
| /\X+?a/
 | |
|     a\=ps
 | |
|     ab\=ps
 | |
|     aa\=ps
 | |
|     aa\=ph
 | |
|     aba\=ps
 | |
|     
 | |
| # These Unicode 6.1.0 scripts are not known to Perl.  
 | |
| 
 | |
| /\p{Chakma}\d/utf,ucp
 | |
|     \x{11100}\x{1113c}
 | |
| 
 | |
| /\p{Takri}\d/utf,ucp
 | |
|     \x{11680}\x{116c0}
 | |
| 
 | |
| /^\X/utf
 | |
|     A\=ps
 | |
|     A\=ph
 | |
|     A\x{300}\x{301}\=ps
 | |
|     A\x{300}\x{301}\=ph
 | |
|     A\x{301}\=ps
 | |
|     A\x{301}\=ph
 | |
|     
 | |
| /^\X{2,3}/utf
 | |
|     A\=ps
 | |
|     A\=ph
 | |
|     AA\=ps
 | |
|     AA\=ph
 | |
|     A\x{300}\x{301}\=ps
 | |
|     A\x{300}\x{301}\=ph
 | |
|     A\x{300}\x{301}A\x{300}\x{301}\=ps
 | |
|     A\x{300}\x{301}A\x{300}\x{301}\=ph
 | |
| 
 | |
| /^\X{2}/utf
 | |
|     AA\=ps
 | |
|     AA\=ph
 | |
|     A\x{300}\x{301}A\x{300}\x{301}\=ps
 | |
|     A\x{300}\x{301}A\x{300}\x{301}\=ph
 | |
|     
 | |
| /^\X+/utf
 | |
|     AA\=ps
 | |
|     AA\=ph
 | |
| 
 | |
| /^\X+?Z/utf
 | |
|     AA\=ps
 | |
|     AA\=ph
 | |
| 
 | |
| /A\x{3a3}B/IBi,utf
 | |
| 
 | |
| /[\x{3a3}]/Bi,utf
 | |
| 
 | |
| /[^\x{3a3}]/Bi,utf
 | |
| 
 | |
| /[\x{3a3}]+/Bi,utf
 | |
| 
 | |
| /[^\x{3a3}]+/Bi,utf
 | |
| 
 | |
| /a*\x{3a3}/Bi,utf
 | |
| 
 | |
| /\x{3a3}+a/Bi,utf
 | |
| 
 | |
| /\x{3a3}*\x{3c2}/Bi,utf
 | |
| 
 | |
| /\x{3a3}{3}/i,utf,aftertext
 | |
|     \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
 | |
| 
 | |
| /\x{3a3}{2,4}/i,utf,aftertext
 | |
|     \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
 | |
| 
 | |
| /\x{3a3}{2,4}?/i,utf,aftertext
 | |
|     \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
 | |
| 
 | |
| /\x{3a3}+./i,utf,aftertext
 | |
|     \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
 | |
| 
 | |
| /\x{3a3}++./i,utf,aftertext
 | |
| \= Expect no match
 | |
|     \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
 | |
| 
 | |
| /\x{3a3}*\x{3c2}/Bi,utf
 | |
| 
 | |
| /[^\x{3a3}]*\x{3c2}/Bi,utf
 | |
| 
 | |
| /[^a]*\x{3c2}/Bi,utf
 | |
| 
 | |
| /ist/Bi,utf
 | |
| \= Expect no match
 | |
|     ikt
 | |
| 
 | |
| /is+t/i,utf
 | |
|     iSs\x{17f}t
 | |
| \= Expect no match
 | |
|     ikt
 | |
| 
 | |
| /is+?t/i,utf
 | |
| \= Expect no match
 | |
|     ikt
 | |
| 
 | |
| /is?t/i,utf
 | |
| \= Expect no match
 | |
|     ikt
 | |
| 
 | |
| /is{2}t/i,utf
 | |
| \= Expect no match
 | |
|     iskt
 | |
|     
 | |
| # This property is a PCRE special 
 | |
| 
 | |
| /^\p{Xuc}/utf
 | |
|     $abc
 | |
|     @abc
 | |
|     `abc
 | |
|     \x{1234}abc
 | |
| \= Expect no match
 | |
|     abc     
 | |
| 
 | |
| /^\p{Xuc}+/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^\p{Xuc}+?/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^\p{Xuc}+?\*/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^\p{Xuc}++/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^\p{Xuc}{3,5}/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^\p{Xuc}{3,5}?/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^[\p{Xuc}]/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^[\p{Xuc}]+/utf
 | |
|     $@`\x{a0}\x{1234}\x{e000}**
 | |
| \= Expect no match
 | |
|     \x{9f}
 | |
| 
 | |
| /^\P{Xuc}/utf
 | |
|     abc
 | |
| \= Expect no match
 | |
|     $abc
 | |
|     @abc
 | |
|     `abc
 | |
|     \x{1234}abc
 | |
| 
 | |
| /^[\P{Xuc}]/utf
 | |
|     abc
 | |
| \= Expect no match
 | |
|     $abc
 | |
|     @abc
 | |
|     `abc
 | |
|     \x{1234}abc
 | |
|     
 | |
| # Some auto-possessification tests 
 | |
| 
 | |
| /\pN+\z/B
 | |
| 
 | |
| /\PN+\z/B
 | |
| 
 | |
| /\pN+/B
 | |
| 
 | |
| /\PN+/B
 | |
| 
 | |
| /\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
 | |
| 
 | |
| /\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
 | |
| 
 | |
| /\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
 | |
| 
 | |
| /\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
 | |
| 
 | |
| /\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
 | |
| 
 | |
| /\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
 | |
| 
 | |
| /\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
 | |
| 
 | |
| /\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
 | |
| 
 | |
| /\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
 | |
| 
 | |
| /\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
 | |
| 
 | |
| /\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
 | |
| 
 | |
| /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
 | |
| 
 | |
| # End auto-possessification tests  
 | |
| 
 | |
| /\w+/B,utf,ucp,auto_callout
 | |
|     abcd
 | |
| 
 | |
| /[\p{N}]?+/B,no_auto_possess
 | |
| 
 | |
| /[\p{L}ab]{2,3}+/B,no_auto_possess
 | |
| 
 | |
| /\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
 | |
| 
 | |
| /.+\X/Bsx
 | |
| 
 | |
| /\X+$/Bmx
 | |
| 
 | |
| /\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
 | |
| 
 | |
| /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
 | |
| 
 | |
| /[RST]+/Bi,utf,ucp
 | |
|     
 | |
| /[R-T]+/Bi,utf,ucp
 | |
| 
 | |
| /[Q-U]+/Bi,utf,ucp
 | |
| 
 | |
| /^s?c/Iim,utf
 | |
|     scat
 | |
| 
 | |
| /\X?abc/utf,no_start_optimize
 | |
|     \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
 | |
| 
 | |
| /\x{100}\x{200}\K\x{300}/utf,startchar
 | |
|     \x{100}\x{200}\x{300}
 | |
|     
 | |
| # Test UTF characters in a substitution
 | |
| 
 | |
| /ábc/utf,replace=XሴZ
 | |
|     123ábc123
 | |
| 
 | |
| /(?<=abc)(|def)/g,utf,replace=<$0>
 | |
|     123abcáyzabcdef789abcሴqr
 | |
| 
 | |
| /[A-`]/iB,utf
 | |
|     abcdefghijklmno
 | |
| 
 | |
| /(?<=\K\x{17f})/g,utf,aftertext
 | |
|     \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
 | |
| 
 | |
| /(?<=\K\x{17f})/altglobal,utf,aftertext
 | |
|     \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
 | |
| 
 | |
| "\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
 | |
| 
 | |
| /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
 | |
| 
 | |
| "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
 | |
| 
 | |
| /[\pS#moq]/
 | |
|     =
 | |
| 
 | |
| /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
 | |
|     cxxxz
 | |
| 
 | |
| /abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
 | |
|     abcd
 | |
| 
 | |
| /a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
 | |
|     a\x{e0}\x{101}\x{c0}\x{102}
 | |
| 
 | |
| /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
 | |
|     ab12cde
 | |
| 
 | |
| /(*UCP)(*UTF)[[:>:]]X/B
 | |
| 
 | |
| /abc/utf,replace=xyz
 | |
|     abc\=zero_terminate
 | |
| 
 | |
| /a[[:punct:]b]/ucp,bincode
 | |
| 
 | |
| /a[[:punct:]b]/utf,ucp,bincode
 | |
| 
 | |
| /a[b[:punct:]]/utf,ucp,bincode
 | |
| 
 | |
| /[[:^ascii:]]/utf,ucp,bincode
 | |
| 
 | |
| /[[:^ascii:]\w]/utf,ucp,bincode
 | |
| 
 | |
| /[\w[:^ascii:]]/utf,ucp,bincode
 | |
| 
 | |
| /[^[:ascii:]\W]/utf,ucp,bincode
 | |
|     \x{de}
 | |
|     \x{200}
 | |
| \= Expect no match     
 | |
|     \x{300}
 | |
|     \x{37e}
 | |
| 
 | |
| /[[:^ascii:]a]/utf,ucp,bincode
 | |
| 
 | |
| /L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
 | |
| 
 | |
| /L(?#(|++<!(2)?/B,utf,ucp,auto_callout
 | |
| 
 | |
| /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
 | |
| 
 | |
| /[\D]/utf
 | |
|     \x{1d7cf}
 | |
| 
 | |
| /[\D\P{Nd}]/utf
 | |
|     \x{1d7cf}
 | |
| 
 | |
| /[^\D]/utf
 | |
|     a9b
 | |
| \= Expect no match
 | |
|     \x{1d7cf}
 | |
| 
 | |
| /[^\D\P{Nd}]/utf
 | |
|     a9b
 | |
|     \x{1d7cf}
 | |
| \= Expect no match
 | |
|     \x{10000}
 | |
|     
 | |
| # Hex uses pattern length, not zero-terminated. This tests for overrunning
 | |
| # the given length of a pattern.
 | |
| 
 | |
| /'(*UTF)'/hex 
 | |
| 
 | |
| /'#('/hex,extended,utf
 | |
| 
 | |
| /a(?<=A\XB)/utf
 | |
| 
 | |
| /ab(?<=A\RB)/utf
 | |
| 
 | |
| /../utf,auto_callout
 | |
|     \n\x{123}\x{123}\x{123}\x{123}
 | |
| 
 | |
| # This tests processing wide characters in extended mode.
 | |
| 
 | |
| /XȀ/x,utf
 | |
| 
 | |
| # These three test a bug fix that was not clearing up after a locale setting
 | |
| # when the test or a subsequent one matched a wide character.
 | |
| 
 | |
| //locale=C
 | |
| 
 | |
| /[\P{Yi}]/utf
 | |
| \x{2f000}
 | |
| 
 | |
| /[\P{Yi}]/utf,locale=C
 | |
| \x{2f000}
 | |
| 
 | |
| /^(?<!(?=))/B,utf
 | |
| 
 | |
| # Horizontal and vertical space lists ignore caseless
 | |
| 
 | |
| /[\HH]/Bi,utf
 | |
| 
 | |
| /[^\HH]/Bi,utf
 | |
| 
 | |
| # End of testinput5 
 |