 36af74cb25
			
		
	
	36af74cb25
	
	
	
		
			
			Some manual changes done to the library were lost with this update. They will be added in the next commit.
		
			
				
	
	
		
			364 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			364 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # This set of tests is for UTF-16 and UTF-32 support, including Unicode
 | |
| # properties. It is relevant only to the 16-bit and 32-bit libraries. The
 | |
| # output is different for each library, so there are separate output files.
 | |
| 
 | |
| /ÃÃÃxxx/IB,utf,no_utf_check
 | |
| 
 | |
| /abc/utf
 | |
|     Ã]
 | |
| 
 | |
| # Check maximum character size 
 | |
| 
 | |
| /\x{ffff}/IB,utf
 | |
| 
 | |
| /\x{10000}/IB,utf
 | |
| 
 | |
| /\x{100}/IB,utf
 | |
| 
 | |
| /\x{1000}/IB,utf
 | |
| 
 | |
| /\x{10000}/IB,utf
 | |
| 
 | |
| /\x{100000}/IB,utf
 | |
| 
 | |
| /\x{10ffff}/IB,utf
 | |
| 
 | |
| /[\x{ff}]/IB,utf
 | |
| 
 | |
| /[\x{100}]/IB,utf
 | |
| 
 | |
| /\x80/IB,utf
 | |
| 
 | |
| /\xff/IB,utf
 | |
| 
 | |
| /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
 | |
|     \x{D55c}\x{ad6d}\x{C5B4}
 | |
| 
 | |
| /\x{65e5}\x{672c}\x{8a9e}/IB,utf
 | |
|     \x{65e5}\x{672c}\x{8a9e}
 | |
| 
 | |
| /\x{80}/IB,utf
 | |
| 
 | |
| /\x{084}/IB,utf
 | |
| 
 | |
| /\x{104}/IB,utf
 | |
| 
 | |
| /\x{861}/IB,utf
 | |
| 
 | |
| /\x{212ab}/IB,utf
 | |
| 
 | |
| /[^ab\xC0-\xF0]/IB,utf
 | |
|     \x{f1}
 | |
|     \x{bf}
 | |
|     \x{100}
 | |
|     \x{1000}
 | |
| \= Expect no match
 | |
|     \x{c0}
 | |
|     \x{f0}
 | |
| 
 | |
| /Ä€{3,4}/IB,utf
 | |
|   \x{100}\x{100}\x{100}\x{100\x{100}
 | |
| 
 | |
| /(\x{100}+|x)/IB,utf
 | |
| 
 | |
| /(\x{100}*a|x)/IB,utf
 | |
| 
 | |
| /(\x{100}{0,2}a|x)/IB,utf
 | |
| 
 | |
| /(\x{100}{1,2}a|x)/IB,utf
 | |
| 
 | |
| /\x{100}/IB,utf
 | |
| 
 | |
| /a\x{100}\x{101}*/IB,utf
 | |
| 
 | |
| /a\x{100}\x{101}+/IB,utf
 | |
| 
 | |
| /[^\x{c4}]/IB
 | |
| 
 | |
| /[\x{100}]/IB,utf
 | |
|     \x{100}
 | |
|     Z\x{100}
 | |
|     \x{100}Z
 | |
| 
 | |
| /[\xff]/IB,utf
 | |
|     >\x{ff}<
 | |
| 
 | |
| /[^\xff]/IB,utf
 | |
| 
 | |
| /\x{100}abc(xyz(?1))/IB,utf
 | |
| 
 | |
| /\777/I,utf
 | |
|   \x{1ff}
 | |
|   \777
 | |
| 
 | |
| /\x{100}+\x{200}/IB,utf
 | |
| 
 | |
| /\x{100}+X/IB,utf
 | |
| 
 | |
| /^[\QĀ\E-\Q�\E/B,utf
 | |
| 
 | |
| /X/utf
 | |
|     XX\x{d800}\=no_utf_check
 | |
|     XX\x{da00}\=no_utf_check
 | |
|     XX\x{dc00}\=no_utf_check
 | |
|     XX\x{de00}\=no_utf_check
 | |
|     XX\x{dfff}\=no_utf_check
 | |
| \= Expect UTF error
 | |
|     XX\x{d800}
 | |
|     XX\x{da00}
 | |
|     XX\x{dc00}
 | |
|     XX\x{de00}
 | |
|     XX\x{dfff}
 | |
|     XX\x{110000}
 | |
|     XX\x{d800}\x{1234}
 | |
| \= Expect no match
 | |
|     XX\x{d800}\=offset=3
 | |
|     
 | |
| /(?<=.)X/utf
 | |
|     XX\x{d800}\=offset=3
 | |
| 
 | |
| /(*UTF16)\x{11234}/
 | |
|   abcd\x{11234}pqr
 | |
| 
 | |
| /(*UTF)\x{11234}/I
 | |
|   abcd\x{11234}pqr
 | |
| 
 | |
| /(*UTF-32)\x{11234}/
 | |
|   abcd\x{11234}pqr
 | |
| 
 | |
| /(*UTF-32)\x{112}/
 | |
|   abcd\x{11234}pqr
 | |
| 
 | |
| /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
 | |
| 
 | |
| /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
 | |
| 
 | |
| /\h/I,utf
 | |
|     ABC\x{09}
 | |
|     ABC\x{20}
 | |
|     ABC\x{a0}
 | |
|     ABC\x{1680}
 | |
|     ABC\x{180e}
 | |
|     ABC\x{2000}
 | |
|     ABC\x{202f}
 | |
|     ABC\x{205f}
 | |
|     ABC\x{3000}
 | |
| 
 | |
| /\v/I,utf
 | |
|     ABC\x{0a}
 | |
|     ABC\x{0b}
 | |
|     ABC\x{0c}
 | |
|     ABC\x{0d}
 | |
|     ABC\x{85}
 | |
|     ABC\x{2028}
 | |
| 
 | |
| /\h*A/I,utf
 | |
|     CDBABC
 | |
|     \x{2000}ABC
 | |
| 
 | |
| /\R*A/I,bsr=unicode,utf
 | |
|     CDBABC
 | |
|     \x{2028}A
 | |
| 
 | |
| /\v+A/I,utf
 | |
| 
 | |
| /\s?xxx\s/I,utf
 | |
| 
 | |
| /\sxxx\s/I,utf,tables=2
 | |
|     AB\x{85}xxx\x{a0}XYZ
 | |
|     AB\x{a0}xxx\x{85}XYZ
 | |
| 
 | |
| /\S \S/I,utf,tables=2
 | |
|     \x{a2} \x{84}
 | |
|     A Z
 | |
| 
 | |
| /a+/utf
 | |
|     a\x{123}aa\=offset=1
 | |
|     a\x{123}aa\=offset=2
 | |
|     a\x{123}aa\=offset=3
 | |
| \= Expect no match
 | |
|     a\x{123}aa\=offset=4
 | |
| \= Expect bad offset error     
 | |
|     a\x{123}aa\=offset=5
 | |
|     a\x{123}aa\=offset=6
 | |
| 
 | |
| /\x{1234}+/Ii,utf
 | |
| 
 | |
| /\x{1234}+?/Ii,utf
 | |
| 
 | |
| /\x{1234}++/Ii,utf
 | |
| 
 | |
| /\x{1234}{2}/Ii,utf
 | |
| 
 | |
| /[^\x{c4}]/IB,utf
 | |
| 
 | |
| /X+\x{200}/IB,utf
 | |
| 
 | |
| /\R/I,utf
 | |
| 
 | |
| # Check bad offset 
 | |
| 
 | |
| /a/utf
 | |
| \= Expect bad UTF-16 offset, or no match in 32-bit
 | |
|     \x{10000}\=offset=1
 | |
|     \x{10000}ab\=offset=1
 | |
| \= Expect 16-bit match, 32-bit no match
 | |
|     \x{10000}ab\=offset=2
 | |
| \= Expect no match     
 | |
|     \x{10000}ab\=offset=3
 | |
| \= Expect no match in 16-bit, bad offset in 32-bit    
 | |
|     \x{10000}ab\=offset=4
 | |
| \= Expect bad offset     
 | |
|     \x{10000}ab\=offset=5
 | |
| 
 | |
| /í¼€/utf
 | |
| 
 | |
| /\w+\x{C4}/B,utf
 | |
|     a\x{C4}\x{C4}
 | |
| 
 | |
| /\w+\x{C4}/B,utf,tables=2
 | |
|     a\x{C4}\x{C4}
 | |
|     
 | |
| /\W+\x{C4}/B,utf
 | |
|     !\x{C4}
 | |
|  
 | |
| /\W+\x{C4}/B,utf,tables=2
 | |
|     !\x{C4}
 | |
| 
 | |
| /\W+\x{A1}/B,utf
 | |
|     !\x{A1}
 | |
|  
 | |
| /\W+\x{A1}/B,utf,tables=2
 | |
|     !\x{A1}
 | |
| 
 | |
| /X\s+\x{A0}/B,utf
 | |
|     X\x20\x{A0}\x{A0}
 | |
| 
 | |
| /X\s+\x{A0}/B,utf,tables=2
 | |
|     X\x20\x{A0}\x{A0}
 | |
| 
 | |
| /\S+\x{A0}/B,utf
 | |
|     X\x{A0}\x{A0}
 | |
| 
 | |
| /\S+\x{A0}/B,utf,tables=2
 | |
|     X\x{A0}\x{A0}
 | |
| 
 | |
| /\x{a0}+\s!/B,utf
 | |
|     \x{a0}\x20!
 | |
| 
 | |
| /\x{a0}+\s!/B,utf,tables=2
 | |
|     \x{a0}\x20!
 | |
| 
 | |
| /(*UTF)abc/never_utf
 | |
| 
 | |
| /abc/utf,never_utf
 | |
| 
 | |
| /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
 | |
| 
 | |
| /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
 | |
| 
 | |
| /AB\x{1fb0}/IB,utf
 | |
| 
 | |
| /AB\x{1fb0}/IBi,utf
 | |
| 
 | |
| /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
 | |
|     \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
 | |
|     \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
 | |
| 
 | |
| /[â±¥]/Bi,utf
 | |
| 
 | |
| /[^â±¥]/Bi,utf
 | |
| 
 | |
| /[[:blank:]]/B,ucp
 | |
| 
 | |
| /\x{212a}+/Ii,utf
 | |
|     KKkk\x{212a}
 | |
| 
 | |
| /s+/Ii,utf
 | |
|     SSss\x{17f}
 | |
| 
 | |
| # Non-UTF characters should give errors in both 16-bit and 32-bit modes.
 | |
| 
 | |
| /\x{110000}/utf
 | |
| 
 | |
| /\o{4200000}/utf
 | |
| 
 | |
| /\x{100}*A/IB,utf
 | |
|     A
 | |
| 
 | |
| /\x{100}*\d(?R)/IB,utf
 | |
| 
 | |
| /[Z\x{100}]/IB,utf
 | |
|     Z\x{100}
 | |
|     \x{100}
 | |
|     \x{100}Z
 | |
| 
 | |
| /[z-\x{100}]/IB,utf
 | |
| 
 | |
| /[z\Qa-d]Ä€\E]/IB,utf
 | |
|     \x{100}
 | |
|     Ā 
 | |
| 
 | |
| /[ab\x{100}]abc(xyz(?1))/IB,utf
 | |
| 
 | |
| /\x{100}*\s/IB,utf
 | |
| 
 | |
| /\x{100}*\d/IB,utf
 | |
| 
 | |
| /\x{100}*\w/IB,utf
 | |
| 
 | |
| /\x{100}*\D/IB,utf
 | |
| 
 | |
| /\x{100}*\S/IB,utf
 | |
| 
 | |
| /\x{100}*\W/IB,utf
 | |
| 
 | |
| /[\x{105}-\x{109}]/IBi,utf
 | |
|     \x{104}
 | |
|     \x{105}
 | |
|     \x{109}  
 | |
| \= Expect no match
 | |
|     \x{100}
 | |
|     \x{10a} 
 | |
|     
 | |
| /[z-\x{100}]/IBi,utf
 | |
|     Z
 | |
|     z
 | |
|     \x{39c}
 | |
|     \x{178}
 | |
|     |
 | |
|     \x{80}
 | |
|     \x{ff}
 | |
|     \x{100}
 | |
|     \x{101} 
 | |
| \= Expect no match
 | |
|     \x{102}
 | |
|     Y
 | |
|     y           
 | |
| 
 | |
| /[z-\x{100}]/IBi,utf
 | |
| 
 | |
| /\x{3a3}B/IBi,utf
 | |
| 
 | |
| /./utf
 | |
|     \x{110000}
 | |
| 
 | |
| /(*UTF)abý¿¿¿¿¿z/B
 | |
| 
 | |
| /abý¿¿¿¿¿z/utf
 | |
| 
 | |
| /[\W\p{Any}]/B
 | |
|     abc
 | |
|     123 
 | |
| 
 | |
| /[\W\pL]/B
 | |
|     abc
 | |
|     \x{100}
 | |
|     \x{308}  
 | |
| \= Expect no match
 | |
|     123     
 | |
| 
 | |
| /[\s[:^ascii:]]/B,ucp
 | |
| 
 | |
| # End of testinput12
 |