358 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			358 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
 | |
| # features that are not compatible with the 8-bit library, or which give
 | |
| # different output in 16-bit or 32-bit mode. The output for the two widths is
 | |
| # different, so they have separate output files.
 | |
|     
 | |
| #forbid_utf
 | |
| 
 | |
| /a\Cb/
 | |
|     aXb
 | |
|     a\nb
 | |
|   
 | |
| /[^\x{c4}]/IB
 | |
|   
 | |
| /\x{100}/I
 | |
| 
 | |
| /  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*                          # optional leading comment
 | |
| (?:    (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |
 | |
| " (?:                      # opening quote...
 | |
| [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
 | |
| |                     #    or
 | |
| \\ [^\x80-\xff]           #   Escaped something (something != CR)
 | |
| )* "  # closing quote
 | |
| )                    # initial word
 | |
| (?:  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  \.  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*   (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |
 | |
| " (?:                      # opening quote...
 | |
| [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
 | |
| |                     #    or
 | |
| \\ [^\x80-\xff]           #   Escaped something (something != CR)
 | |
| )* "  # closing quote
 | |
| )  )* # further okay, if led by a period
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  @  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*    (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                           # initial subdomain
 | |
| (?:                                  #
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  \.                        # if led by a period...
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*   (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                     #   ...further okay
 | |
| )*
 | |
| # address
 | |
| |                     #  or
 | |
| (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |
 | |
| " (?:                      # opening quote...
 | |
| [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
 | |
| |                     #    or
 | |
| \\ [^\x80-\xff]           #   Escaped something (something != CR)
 | |
| )* "  # closing quote
 | |
| )             # one word, optionally followed by....
 | |
| (?:
 | |
| [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
 | |
| \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)       |  # comments, or...
 | |
| 
 | |
| " (?:                      # opening quote...
 | |
| [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
 | |
| |                     #    or
 | |
| \\ [^\x80-\xff]           #   Escaped something (something != CR)
 | |
| )* "  # closing quote
 | |
| # quoted strings
 | |
| )*
 | |
| <  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*                     # leading <
 | |
| (?:  @  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*    (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                           # initial subdomain
 | |
| (?:                                  #
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  \.                        # if led by a period...
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*   (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                     #   ...further okay
 | |
| )*
 | |
| 
 | |
| (?:  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  ,  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  @  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*    (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                           # initial subdomain
 | |
| (?:                                  #
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  \.                        # if led by a period...
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*   (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                     #   ...further okay
 | |
| )*
 | |
| )* # further okay, if led by comma
 | |
| :                                # closing colon
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  )? #       optional route
 | |
| (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |
 | |
| " (?:                      # opening quote...
 | |
| [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
 | |
| |                     #    or
 | |
| \\ [^\x80-\xff]           #   Escaped something (something != CR)
 | |
| )* "  # closing quote
 | |
| )                    # initial word
 | |
| (?:  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  \.  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*   (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |
 | |
| " (?:                      # opening quote...
 | |
| [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
 | |
| |                     #    or
 | |
| \\ [^\x80-\xff]           #   Escaped something (something != CR)
 | |
| )* "  # closing quote
 | |
| )  )* # further okay, if led by a period
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  @  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*    (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                           # initial subdomain
 | |
| (?:                                  #
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  \.                        # if led by a period...
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*   (?:
 | |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
 | |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
 | |
| |   \[                         # [
 | |
| (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
 | |
| \]                        #           ]
 | |
| )                     #   ...further okay
 | |
| )*
 | |
| #       address spec
 | |
| (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*  > #                  trailing >
 | |
| # name and address
 | |
| )  (?: [\040\t] |  \(
 | |
| (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
 | |
| \)  )*                       # optional trailing comment
 | |
| /Ix
 | |
| 
 | |
| /[\h]/B
 | |
|     >\x09<
 | |
| 
 | |
| /[\h]+/B
 | |
|     >\x09\x20\xa0<
 | |
| 
 | |
| /[\v]/B
 | |
| 
 | |
| /[^\h]/B
 | |
| 
 | |
| /\h+/I
 | |
|     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
 | |
|     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
 | |
| 
 | |
| /[\h\x{dc00}]+/IB
 | |
|     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
 | |
|     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
 | |
| 
 | |
| /\H+/I
 | |
|     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
 | |
|     \x{2000}\x{200a}\x{1fff}\x{200b}
 | |
|     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
 | |
|     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
 | |
| 
 | |
| /[\H\x{d800}]+/
 | |
|     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
 | |
|     \x{2000}\x{200a}\x{1fff}\x{200b}
 | |
|     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
 | |
|     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
 | |
| 
 | |
| /\v+/I
 | |
|     \x{2027}\x{2030}\x{2028}\x{2029}
 | |
|     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
 | |
| 
 | |
| /[\v\x{dc00}]+/IB
 | |
|     \x{2027}\x{2030}\x{2028}\x{2029}
 | |
|     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
 | |
| 
 | |
| /\V+/I
 | |
|     \x{2028}\x{2029}\x{2027}\x{2030}
 | |
|     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
 | |
| 
 | |
| /[\V\x{d800}]+/
 | |
|     \x{2028}\x{2029}\x{2027}\x{2030}
 | |
|     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
 | |
| 
 | |
| /\R+/I,bsr=unicode
 | |
|     \x{2027}\x{2030}\x{2028}\x{2029}
 | |
|     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
 | |
| 
 | |
| /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
 | |
|     \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
 | |
| 
 | |
| /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
 | |
| 
 | |
| /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
 | |
| 
 | |
| /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
 | |
| 
 | |
| /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
 | |
| 
 | |
| /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
 | |
|     XX
 | |
|      
 | |
| /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
 | |
|     XX
 | |
| 
 | |
| /\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
 | |
| 
 | |
| /[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
 | |
| 
 | |
| /\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
 | |
| 
 | |
| /^\x{ffff}+/i
 | |
|     \x{ffff}
 | |
| 
 | |
| /^\x{ffff}?/i
 | |
|     \x{ffff}
 | |
| 
 | |
| /^\x{ffff}*/i
 | |
|     \x{ffff}
 | |
| 
 | |
| /^\x{ffff}{3}/i
 | |
|     \x{ffff}\x{ffff}\x{ffff}
 | |
| 
 | |
| /^\x{ffff}{0,3}/i
 | |
|     \x{ffff}
 | |
| 
 | |
| /[^\x00-a]{12,}[^b-\xff]*/B
 | |
| 
 | |
| /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
 | |
| 
 | |
| /a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
 | |
| 
 | |
| /^[\x{1234}\x{4321}]{2,4}?/
 | |
|     \x{1234}\x{1234}\x{1234}
 | |
| 
 | |
| # Check maximum non-UTF character size for the 16-bit library.
 | |
| 
 | |
| /\x{ffff}/
 | |
|     A\x{ffff}B
 | |
| 
 | |
| /\x{10000}/
 | |
| 
 | |
| /\o{20000}/
 | |
| 
 | |
| # Check maximum character size for the 32-bit library. These will all give
 | |
| # errors in the 16-bit library.
 | |
| 
 | |
| /\x{110000}/
 | |
| 
 | |
| /\x{7fffffff}/
 | |
| 
 | |
| /\x{80000000}/
 | |
| 
 | |
| /\x{ffffffff}/
 | |
| 
 | |
| /\x{100000000}/
 | |
| 
 | |
| /\o{17777777777}/
 | |
| 
 | |
| /\o{20000000000}/
 | |
| 
 | |
| /\o{37777777777}/
 | |
| 
 | |
| /\o{40000000000}/
 | |
| 
 | |
| /\x{7fffffff}\x{7fffffff}/I
 | |
| 
 | |
| /\x{80000000}\x{80000000}/I
 | |
| 
 | |
| /\x{ffffffff}\x{ffffffff}/I
 | |
| 
 | |
| # Non-UTF characters 
 | |
| 
 | |
| /\C{2,3}/
 | |
|     \x{400000}\x{400001}\x{400002}\x{400003}
 | |
| 
 | |
| /\x{400000}\x{800000}/IBi
 | |
| 
 | |
| # Check character ranges 
 | |
| 
 | |
| /[\H]/IB
 | |
| 
 | |
| /[\V]/IB
 | |
| 
 | |
| # End of testinput11
 | 
