 36af74cb25
			
		
	
	36af74cb25
	
	
	
		
			
			Some manual changes done to the library were lost with this update. They will be added in the next commit.
		
			
				
	
	
		
			219 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			219 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*************************************************
 | |
| *      Perl-Compatible Regular Expressions       *
 | |
| *************************************************/
 | |
| 
 | |
| /* PCRE is a library of functions to support regular expressions whose syntax
 | |
| and semantics are as close as possible to those of the Perl 5 language.
 | |
| 
 | |
|                        Written by Philip Hazel
 | |
|      Original API code Copyright (c) 1997-2012 University of Cambridge
 | |
|          New API code Copyright (c) 2016 University of Cambridge
 | |
| 
 | |
| -----------------------------------------------------------------------------
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions are met:
 | |
| 
 | |
|     * Redistributions of source code must retain the above copyright notice,
 | |
|       this list of conditions and the following disclaimer.
 | |
| 
 | |
|     * Redistributions in binary form must reproduce the above copyright
 | |
|       notice, this list of conditions and the following disclaimer in the
 | |
|       documentation and/or other materials provided with the distribution.
 | |
| 
 | |
|     * Neither the name of the University of Cambridge nor the names of its
 | |
|       contributors may be used to endorse or promote products derived from
 | |
|       this software without specific prior written permission.
 | |
| 
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 | |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 | |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | |
| POSSIBILITY OF SUCH DAMAGE.
 | |
| -----------------------------------------------------------------------------
 | |
| */
 | |
| 
 | |
| 
 | |
| /* This module contains a single function that scans through a compiled pattern
 | |
| until it finds a capturing bracket with the given number, or, if the number is
 | |
| negative, an instance of OP_REVERSE for a lookbehind. The function is called
 | |
| from pcre2_compile.c and also from pcre2_study.c when finding the minimum
 | |
| matching length. */
 | |
| 
 | |
| 
 | |
| #ifdef HAVE_CONFIG_H
 | |
| #include "config.h"
 | |
| #endif
 | |
| 
 | |
| #include "pcre2_internal.h"
 | |
| 
 | |
| 
 | |
| /*************************************************
 | |
| *    Scan compiled regex for specific bracket    *
 | |
| *************************************************/
 | |
| 
 | |
| /*
 | |
| Arguments:
 | |
|   code        points to start of expression
 | |
|   utf         TRUE in UTF mode
 | |
|   number      the required bracket number or negative to find a lookbehind
 | |
| 
 | |
| Returns:      pointer to the opcode for the bracket, or NULL if not found
 | |
| */
 | |
| 
 | |
| PCRE2_SPTR
 | |
| PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
 | |
| {
 | |
| for (;;)
 | |
|   {
 | |
|   PCRE2_UCHAR c = *code;
 | |
| 
 | |
|   if (c == OP_END) return NULL;
 | |
| 
 | |
|   /* XCLASS is used for classes that cannot be represented just by a bit map.
 | |
|   This includes negated single high-valued characters. CALLOUT_STR is used for
 | |
|   callouts with string arguments. In both cases the length in the table is
 | |
|   zero; the actual length is stored in the compiled code. */
 | |
| 
 | |
|   if (c == OP_XCLASS) code += GET(code, 1);
 | |
|     else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
 | |
| 
 | |
|   /* Handle lookbehind */
 | |
| 
 | |
|   else if (c == OP_REVERSE)
 | |
|     {
 | |
|     if (number < 0) return (PCRE2_UCHAR *)code;
 | |
|     code += PRIV(OP_lengths)[c];
 | |
|     }
 | |
| 
 | |
|   /* Handle capturing bracket */
 | |
| 
 | |
|   else if (c == OP_CBRA || c == OP_SCBRA ||
 | |
|            c == OP_CBRAPOS || c == OP_SCBRAPOS)
 | |
|     {
 | |
|     int n = (int)GET2(code, 1+LINK_SIZE);
 | |
|     if (n == number) return (PCRE2_UCHAR *)code;
 | |
|     code += PRIV(OP_lengths)[c];
 | |
|     }
 | |
| 
 | |
|   /* Otherwise, we can get the item's length from the table, except that for
 | |
|   repeated character types, we have to test for \p and \P, which have an extra
 | |
|   two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
 | |
|   must add in its length. */
 | |
| 
 | |
|   else
 | |
|     {
 | |
|     switch(c)
 | |
|       {
 | |
|       case OP_TYPESTAR:
 | |
|       case OP_TYPEMINSTAR:
 | |
|       case OP_TYPEPLUS:
 | |
|       case OP_TYPEMINPLUS:
 | |
|       case OP_TYPEQUERY:
 | |
|       case OP_TYPEMINQUERY:
 | |
|       case OP_TYPEPOSSTAR:
 | |
|       case OP_TYPEPOSPLUS:
 | |
|       case OP_TYPEPOSQUERY:
 | |
|       if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
 | |
|       break;
 | |
| 
 | |
|       case OP_TYPEUPTO:
 | |
|       case OP_TYPEMINUPTO:
 | |
|       case OP_TYPEEXACT:
 | |
|       case OP_TYPEPOSUPTO:
 | |
|       if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
 | |
|         code += 2;
 | |
|       break;
 | |
| 
 | |
|       case OP_MARK:
 | |
|       case OP_PRUNE_ARG:
 | |
|       case OP_SKIP_ARG:
 | |
|       case OP_THEN_ARG:
 | |
|       code += code[1];
 | |
|       break;
 | |
|       }
 | |
| 
 | |
|     /* Add in the fixed length from the table */
 | |
| 
 | |
|     code += PRIV(OP_lengths)[c];
 | |
| 
 | |
|   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
 | |
|   followed by a multi-byte character. The length in the table is a minimum, so
 | |
|   we have to arrange to skip the extra bytes. */
 | |
| 
 | |
| #ifdef MAYBE_UTF_MULTI
 | |
|     if (utf) switch(c)
 | |
|       {
 | |
|       case OP_CHAR:
 | |
|       case OP_CHARI:
 | |
|       case OP_NOT:
 | |
|       case OP_NOTI:
 | |
|       case OP_EXACT:
 | |
|       case OP_EXACTI:
 | |
|       case OP_NOTEXACT:
 | |
|       case OP_NOTEXACTI:
 | |
|       case OP_UPTO:
 | |
|       case OP_UPTOI:
 | |
|       case OP_NOTUPTO:
 | |
|       case OP_NOTUPTOI:
 | |
|       case OP_MINUPTO:
 | |
|       case OP_MINUPTOI:
 | |
|       case OP_NOTMINUPTO:
 | |
|       case OP_NOTMINUPTOI:
 | |
|       case OP_POSUPTO:
 | |
|       case OP_POSUPTOI:
 | |
|       case OP_NOTPOSUPTO:
 | |
|       case OP_NOTPOSUPTOI:
 | |
|       case OP_STAR:
 | |
|       case OP_STARI:
 | |
|       case OP_NOTSTAR:
 | |
|       case OP_NOTSTARI:
 | |
|       case OP_MINSTAR:
 | |
|       case OP_MINSTARI:
 | |
|       case OP_NOTMINSTAR:
 | |
|       case OP_NOTMINSTARI:
 | |
|       case OP_POSSTAR:
 | |
|       case OP_POSSTARI:
 | |
|       case OP_NOTPOSSTAR:
 | |
|       case OP_NOTPOSSTARI:
 | |
|       case OP_PLUS:
 | |
|       case OP_PLUSI:
 | |
|       case OP_NOTPLUS:
 | |
|       case OP_NOTPLUSI:
 | |
|       case OP_MINPLUS:
 | |
|       case OP_MINPLUSI:
 | |
|       case OP_NOTMINPLUS:
 | |
|       case OP_NOTMINPLUSI:
 | |
|       case OP_POSPLUS:
 | |
|       case OP_POSPLUSI:
 | |
|       case OP_NOTPOSPLUS:
 | |
|       case OP_NOTPOSPLUSI:
 | |
|       case OP_QUERY:
 | |
|       case OP_QUERYI:
 | |
|       case OP_NOTQUERY:
 | |
|       case OP_NOTQUERYI:
 | |
|       case OP_MINQUERY:
 | |
|       case OP_MINQUERYI:
 | |
|       case OP_NOTMINQUERY:
 | |
|       case OP_NOTMINQUERYI:
 | |
|       case OP_POSQUERY:
 | |
|       case OP_POSQUERYI:
 | |
|       case OP_NOTPOSQUERY:
 | |
|       case OP_NOTPOSQUERYI:
 | |
|       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
 | |
|       break;
 | |
|       }
 | |
| #else
 | |
|     (void)(utf);  /* Keep compiler happy by referencing function argument */
 | |
| #endif  /* MAYBE_UTF_MULTI */
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| /* End of pcre2_find_bracket.c */
 |