262 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			262 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*************************************************
 | |
| *      Perl-Compatible Regular Expressions       *
 | |
| *************************************************/
 | |
| 
 | |
| /* PCRE is a library of functions to support regular expressions whose syntax
 | |
| and semantics are as close as possible to those of the Perl 5 language.
 | |
| 
 | |
|                        Written by Philip Hazel
 | |
|      Original API code Copyright (c) 1997-2012 University of Cambridge
 | |
|          New API code Copyright (c) 2014 University of Cambridge
 | |
| 
 | |
| -----------------------------------------------------------------------------
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions are met:
 | |
| 
 | |
|     * Redistributions of source code must retain the above copyright notice,
 | |
|       this list of conditions and the following disclaimer.
 | |
| 
 | |
|     * Redistributions in binary form must reproduce the above copyright
 | |
|       notice, this list of conditions and the following disclaimer in the
 | |
|       documentation and/or other materials provided with the distribution.
 | |
| 
 | |
|     * Neither the name of the University of Cambridge nor the names of its
 | |
|       contributors may be used to endorse or promote products derived from
 | |
|       this software without specific prior written permission.
 | |
| 
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 | |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 | |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | |
| POSSIBILITY OF SUCH DAMAGE.
 | |
| -----------------------------------------------------------------------------
 | |
| */
 | |
| 
 | |
| 
 | |
| #ifndef _PCRE2_UCP_H
 | |
| #define _PCRE2_UCP_H
 | |
| 
 | |
| /* This file contains definitions of the property values that are returned by
 | |
| the UCD access macros. New values that are added for new releases of Unicode
 | |
| should always be at the end of each enum, for backwards compatibility.
 | |
| 
 | |
| IMPORTANT: Note also that the specific numeric values of the enums have to be
 | |
| the same as the values that are generated by the maint/MultiStage2.py script,
 | |
| where the equivalent property descriptive names are listed in vectors.
 | |
| 
 | |
| ALSO: The specific values of the first two enums are assumed for the table
 | |
| called catposstab in pcre2_compile.c. */
 | |
| 
 | |
| /* These are the general character categories. */
 | |
| 
 | |
| enum {
 | |
|   ucp_C,     /* Other */
 | |
|   ucp_L,     /* Letter */
 | |
|   ucp_M,     /* Mark */
 | |
|   ucp_N,     /* Number */
 | |
|   ucp_P,     /* Punctuation */
 | |
|   ucp_S,     /* Symbol */
 | |
|   ucp_Z      /* Separator */
 | |
| };
 | |
| 
 | |
| /* These are the particular character categories. */
 | |
| 
 | |
| enum {
 | |
|   ucp_Cc,    /* Control */
 | |
|   ucp_Cf,    /* Format */
 | |
|   ucp_Cn,    /* Unassigned */
 | |
|   ucp_Co,    /* Private use */
 | |
|   ucp_Cs,    /* Surrogate */
 | |
|   ucp_Ll,    /* Lower case letter */
 | |
|   ucp_Lm,    /* Modifier letter */
 | |
|   ucp_Lo,    /* Other letter */
 | |
|   ucp_Lt,    /* Title case letter */
 | |
|   ucp_Lu,    /* Upper case letter */
 | |
|   ucp_Mc,    /* Spacing mark */
 | |
|   ucp_Me,    /* Enclosing mark */
 | |
|   ucp_Mn,    /* Non-spacing mark */
 | |
|   ucp_Nd,    /* Decimal number */
 | |
|   ucp_Nl,    /* Letter number */
 | |
|   ucp_No,    /* Other number */
 | |
|   ucp_Pc,    /* Connector punctuation */
 | |
|   ucp_Pd,    /* Dash punctuation */
 | |
|   ucp_Pe,    /* Close punctuation */
 | |
|   ucp_Pf,    /* Final punctuation */
 | |
|   ucp_Pi,    /* Initial punctuation */
 | |
|   ucp_Po,    /* Other punctuation */
 | |
|   ucp_Ps,    /* Open punctuation */
 | |
|   ucp_Sc,    /* Currency symbol */
 | |
|   ucp_Sk,    /* Modifier symbol */
 | |
|   ucp_Sm,    /* Mathematical symbol */
 | |
|   ucp_So,    /* Other symbol */
 | |
|   ucp_Zl,    /* Line separator */
 | |
|   ucp_Zp,    /* Paragraph separator */
 | |
|   ucp_Zs     /* Space separator */
 | |
| };
 | |
| 
 | |
| /* These are grapheme break properties. Note that the code for processing them
 | |
| assumes that the values are less than 16. If more values are added that take
 | |
| the number to 16 or more, the code will have to be rewritten. */
 | |
| 
 | |
| enum {
 | |
|   ucp_gbCR,                /*  0 */
 | |
|   ucp_gbLF,                /*  1 */
 | |
|   ucp_gbControl,           /*  2 */
 | |
|   ucp_gbExtend,            /*  3 */
 | |
|   ucp_gbPrepend,           /*  4 */
 | |
|   ucp_gbSpacingMark,       /*  5 */
 | |
|   ucp_gbL,                 /*  6 Hangul syllable type L */
 | |
|   ucp_gbV,                 /*  7 Hangul syllable type V */
 | |
|   ucp_gbT,                 /*  8 Hangul syllable type T */
 | |
|   ucp_gbLV,                /*  9 Hangul syllable type LV */
 | |
|   ucp_gbLVT,               /* 10 Hangul syllable type LVT */
 | |
|   ucp_gbRegionalIndicator, /* 11 */
 | |
|   ucp_gbOther              /* 12 */
 | |
| };
 | |
| 
 | |
| /* These are the script identifications. */
 | |
| 
 | |
| enum {
 | |
|   ucp_Arabic,
 | |
|   ucp_Armenian,
 | |
|   ucp_Bengali,
 | |
|   ucp_Bopomofo,
 | |
|   ucp_Braille,
 | |
|   ucp_Buginese,
 | |
|   ucp_Buhid,
 | |
|   ucp_Canadian_Aboriginal,
 | |
|   ucp_Cherokee,
 | |
|   ucp_Common,
 | |
|   ucp_Coptic,
 | |
|   ucp_Cypriot,
 | |
|   ucp_Cyrillic,
 | |
|   ucp_Deseret,
 | |
|   ucp_Devanagari,
 | |
|   ucp_Ethiopic,
 | |
|   ucp_Georgian,
 | |
|   ucp_Glagolitic,
 | |
|   ucp_Gothic,
 | |
|   ucp_Greek,
 | |
|   ucp_Gujarati,
 | |
|   ucp_Gurmukhi,
 | |
|   ucp_Han,
 | |
|   ucp_Hangul,
 | |
|   ucp_Hanunoo,
 | |
|   ucp_Hebrew,
 | |
|   ucp_Hiragana,
 | |
|   ucp_Inherited,
 | |
|   ucp_Kannada,
 | |
|   ucp_Katakana,
 | |
|   ucp_Kharoshthi,
 | |
|   ucp_Khmer,
 | |
|   ucp_Lao,
 | |
|   ucp_Latin,
 | |
|   ucp_Limbu,
 | |
|   ucp_Linear_B,
 | |
|   ucp_Malayalam,
 | |
|   ucp_Mongolian,
 | |
|   ucp_Myanmar,
 | |
|   ucp_New_Tai_Lue,
 | |
|   ucp_Ogham,
 | |
|   ucp_Old_Italic,
 | |
|   ucp_Old_Persian,
 | |
|   ucp_Oriya,
 | |
|   ucp_Osmanya,
 | |
|   ucp_Runic,
 | |
|   ucp_Shavian,
 | |
|   ucp_Sinhala,
 | |
|   ucp_Syloti_Nagri,
 | |
|   ucp_Syriac,
 | |
|   ucp_Tagalog,
 | |
|   ucp_Tagbanwa,
 | |
|   ucp_Tai_Le,
 | |
|   ucp_Tamil,
 | |
|   ucp_Telugu,
 | |
|   ucp_Thaana,
 | |
|   ucp_Thai,
 | |
|   ucp_Tibetan,
 | |
|   ucp_Tifinagh,
 | |
|   ucp_Ugaritic,
 | |
|   ucp_Yi,
 | |
|   /* New for Unicode 5.0: */
 | |
|   ucp_Balinese,
 | |
|   ucp_Cuneiform,
 | |
|   ucp_Nko,
 | |
|   ucp_Phags_Pa,
 | |
|   ucp_Phoenician,
 | |
|   /* New for Unicode 5.1: */
 | |
|   ucp_Carian,
 | |
|   ucp_Cham,
 | |
|   ucp_Kayah_Li,
 | |
|   ucp_Lepcha,
 | |
|   ucp_Lycian,
 | |
|   ucp_Lydian,
 | |
|   ucp_Ol_Chiki,
 | |
|   ucp_Rejang,
 | |
|   ucp_Saurashtra,
 | |
|   ucp_Sundanese,
 | |
|   ucp_Vai,
 | |
|   /* New for Unicode 5.2: */
 | |
|   ucp_Avestan,
 | |
|   ucp_Bamum,
 | |
|   ucp_Egyptian_Hieroglyphs,
 | |
|   ucp_Imperial_Aramaic,
 | |
|   ucp_Inscriptional_Pahlavi,
 | |
|   ucp_Inscriptional_Parthian,
 | |
|   ucp_Javanese,
 | |
|   ucp_Kaithi,
 | |
|   ucp_Lisu,
 | |
|   ucp_Meetei_Mayek,
 | |
|   ucp_Old_South_Arabian,
 | |
|   ucp_Old_Turkic,
 | |
|   ucp_Samaritan,
 | |
|   ucp_Tai_Tham,
 | |
|   ucp_Tai_Viet,
 | |
|   /* New for Unicode 6.0.0: */
 | |
|   ucp_Batak,
 | |
|   ucp_Brahmi,
 | |
|   ucp_Mandaic,
 | |
|   /* New for Unicode 6.1.0: */
 | |
|   ucp_Chakma,
 | |
|   ucp_Meroitic_Cursive,
 | |
|   ucp_Meroitic_Hieroglyphs,
 | |
|   ucp_Miao,
 | |
|   ucp_Sharada,
 | |
|   ucp_Sora_Sompeng,
 | |
|   ucp_Takri,
 | |
|   /* New for Unicode 7.0.0: */
 | |
|   ucp_Bassa_Vah,
 | |
|   ucp_Caucasian_Albanian,
 | |
|   ucp_Duployan,
 | |
|   ucp_Elbasan,
 | |
|   ucp_Grantha,
 | |
|   ucp_Khojki,
 | |
|   ucp_Khudawadi,
 | |
|   ucp_Linear_A,
 | |
|   ucp_Mahajani,
 | |
|   ucp_Manichaean,
 | |
|   ucp_Mende_Kikakui,
 | |
|   ucp_Modi,
 | |
|   ucp_Mro,
 | |
|   ucp_Nabataean,
 | |
|   ucp_Old_North_Arabian,
 | |
|   ucp_Old_Permic,
 | |
|   ucp_Pahawh_Hmong,
 | |
|   ucp_Palmyrene,
 | |
|   ucp_Psalter_Pahlavi,
 | |
|   ucp_Pau_Cin_Hau,
 | |
|   ucp_Siddham,
 | |
|   ucp_Tirhuta,
 | |
|   ucp_Warang_Citi
 | |
| };
 | |
| 
 | |
| #endif
 | |
| 
 | |
| /* End of pcre2_ucp.h */
 | 
