Added bundled PCRE2 library.
This commit is contained in:
@ -40,6 +40,10 @@ find_package(TCMalloc)
|
||||
find_package(Jemalloc)
|
||||
find_package(Git)
|
||||
find_package(CURL)
|
||||
|
||||
# Build PCRE2
|
||||
include(cmake/BuildPCRE2.cmake)
|
||||
|
||||
# You can find the variables set by this in the FindCURL.cmake file
|
||||
# which is a default module in CMake.
|
||||
|
||||
|
16
cmake/BuildPCRE2.cmake
Normal file
16
cmake/BuildPCRE2.cmake
Normal file
@ -0,0 +1,16 @@
|
||||
# Build the PCRE2 library from source
|
||||
set(PCRE_ROOT_DIR ${CMAKE_SOURCE_DIR}/pcre2/)
|
||||
set(PCRE_BUILD_DIR ${CMAKE_BINARY_DIR}/pcre2/)
|
||||
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${PCRE_BUILD_DIR})
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${PCRE_ROOT_DIR} ${PCRE_BUILD_DIR})
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} ${PCRE_BUILD_DIR}
|
||||
-DBUILD_SHARED_LIBS=Y
|
||||
-DPCRE2_BUILD_PCRE2GREP=N
|
||||
-DPCRE2_BUILD_TESTS=N
|
||||
WORKING_DIRECTORY ${PCRE_BUILD_DIR})
|
||||
execute_process(COMMAND make WORKING_DIRECTORY ${PCRE_BUILD_DIR})
|
||||
|
||||
set(PCRE2_LIBRARIES ${CMAKE_BINARY_DIR}/pcre2/libpcre2-8.so CACHE STRING "PCRE2 dynamic libraries" FORCE)
|
||||
include_directories(${CMAKE_BINARY_DIR}/pcre2/)
|
||||
install(PROGRAMS ${PCRE2_LIBRARIES} DESTINATION ${MAXSCALE_LIBDIR})
|
313
pcre2/132html
Executable file
313
pcre2/132html
Executable file
@ -0,0 +1,313 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to turn PCRE2 man pages into HTML
|
||||
|
||||
|
||||
# Subroutine to handle font changes and other escapes
|
||||
|
||||
sub do_line {
|
||||
my($s) = $_[0];
|
||||
|
||||
$s =~ s/</</g; # Deal with < and >
|
||||
$s =~ s/>/>/g;
|
||||
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
|
||||
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
|
||||
$s =~ s"\\e"\\"g;
|
||||
$s =~ s/(?<=Copyright )\(c\)/©/g;
|
||||
$s;
|
||||
}
|
||||
|
||||
# Subroutine to ensure not in a paragraph
|
||||
|
||||
sub end_para {
|
||||
if ($inpara)
|
||||
{
|
||||
print TEMP "</PRE>\n" if ($inpre);
|
||||
print TEMP "</P>\n";
|
||||
}
|
||||
$inpara = $inpre = 0;
|
||||
$wrotetext = 0;
|
||||
}
|
||||
|
||||
# Subroutine to start a new paragraph
|
||||
|
||||
sub new_para {
|
||||
&end_para();
|
||||
print TEMP "<P>\n";
|
||||
$inpara = 1;
|
||||
}
|
||||
|
||||
|
||||
# Main program
|
||||
|
||||
$innf = 0;
|
||||
$inpara = 0;
|
||||
$inpre = 0;
|
||||
$wrotetext = 0;
|
||||
$toc = 0;
|
||||
$ref = 1;
|
||||
|
||||
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
|
||||
{
|
||||
$toc = 1 if $ARGV[0] eq "-toc";
|
||||
shift;
|
||||
}
|
||||
|
||||
# Initial output to STDOUT
|
||||
|
||||
print <<End ;
|
||||
<html>
|
||||
<head>
|
||||
<title>$ARGV[0] specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>$ARGV[0] man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
End
|
||||
|
||||
print "<ul>\n" if ($toc);
|
||||
|
||||
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
# Handle lines beginning with a dot
|
||||
|
||||
if (/^\./)
|
||||
{
|
||||
# Some of the PCRE2 man pages used to contain instances of .br. However,
|
||||
# they should have all been removed because they cause trouble in some
|
||||
# (other) automated systems that translate man pages to HTML. Complain if
|
||||
# we find .br or .in (another macro that is deprecated).
|
||||
|
||||
if (/^\.br/ || /^\.in/)
|
||||
{
|
||||
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
|
||||
print STDERR "*** $_\n";
|
||||
die "*** Processing abandoned\n";
|
||||
}
|
||||
|
||||
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
|
||||
|
||||
elsif (/^\.nf/)
|
||||
{
|
||||
$innf = 1;
|
||||
}
|
||||
|
||||
elsif (/^\.fi/)
|
||||
{
|
||||
$innf = 0;
|
||||
}
|
||||
|
||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||
# the next line is a non literal text line; similarly, if not inside a
|
||||
# literal section, do nothing if a literal follows, unless we are inside
|
||||
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
|
||||
# literal sections will do the spacing. Always skip if no previous output.
|
||||
|
||||
elsif (/^\.sp/)
|
||||
{
|
||||
if ($wrotetext)
|
||||
{
|
||||
$_ = <STDIN>;
|
||||
if ($inpre)
|
||||
{
|
||||
print TEMP "\n" if (/^[\s.]/);
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
|
||||
}
|
||||
redo; # Now process the lookahead line we just read
|
||||
}
|
||||
}
|
||||
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
|
||||
{
|
||||
&new_para();
|
||||
}
|
||||
elsif (/^\.SH\s*("?)(.*)\1/)
|
||||
{
|
||||
# Ignore the NAME section
|
||||
if ($2 =~ /^NAME\b/)
|
||||
{
|
||||
<STDIN>;
|
||||
next;
|
||||
}
|
||||
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
if ($toc)
|
||||
{
|
||||
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
||||
$ref, $ref);
|
||||
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
||||
$ref, $ref);
|
||||
$ref++;
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
}
|
||||
elsif (/^\.SS\s*("?)(.*)\1/)
|
||||
{
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
elsif (/^\.B\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<b>$_</b>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
elsif (/^\.I\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<i>$_</i>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# A comment that starts "HREF" takes the next line as a name that
|
||||
# is turned into a hyperlink, using the text given, which might be
|
||||
# in a special font. If it ends in () or (digits) or punctuation, they
|
||||
# aren't part of the link.
|
||||
|
||||
elsif (/^\.\\"\s*HREF/)
|
||||
{
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
|
||||
print TEMP "<a href=\"$1.html\">$_</a>\n";
|
||||
}
|
||||
|
||||
# A comment that starts "HTML" inserts literal HTML
|
||||
|
||||
elsif (/^\.\\"\s*HTML\s*(.*)/)
|
||||
{
|
||||
print TEMP $1;
|
||||
}
|
||||
|
||||
# A comment that starts < inserts that HTML at the end of the
|
||||
# *next* input line - so as not to get a newline between them.
|
||||
|
||||
elsif (/^\.\\"\s*(<.*>)/)
|
||||
{
|
||||
my($markup) = $1;
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
print TEMP "$_$markup\n";
|
||||
}
|
||||
|
||||
# A comment that starts JOIN joins the next two lines together, with one
|
||||
# space between them. Then that line is processed. This is used in some
|
||||
# displays where two lines are needed for the "man" version. JOINSH works
|
||||
# the same, except that it assumes this is a shell command, so removes
|
||||
# continuation backslashes.
|
||||
|
||||
elsif (/^\.\\"\s*JOIN(SH)?/)
|
||||
{
|
||||
my($one,$two);
|
||||
$one = <STDIN>;
|
||||
$two = <STDIN>;
|
||||
$one =~ s/\s*\\e\s*$// if (defined($1));
|
||||
chomp($one);
|
||||
$two =~ s/^\s+//;
|
||||
$_ = "$one $two";
|
||||
redo; # Process the joined lines
|
||||
}
|
||||
|
||||
# .EX/.EE are used in the pcre2demo page to bracket the entire program,
|
||||
# which is unmodified except for turning backslash into "\e".
|
||||
|
||||
elsif (/^\.EX\s*$/)
|
||||
{
|
||||
print TEMP "<PRE>\n";
|
||||
while (<STDIN>)
|
||||
{
|
||||
last if /^\.EE\s*$/;
|
||||
s/\\e/\\/g;
|
||||
s/&/&/g;
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
print TEMP;
|
||||
}
|
||||
}
|
||||
|
||||
# Ignore anything not recognized
|
||||
|
||||
next;
|
||||
}
|
||||
|
||||
# Line does not begin with a dot. Replace blank lines with new paragraphs
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
&end_para() if ($wrotetext);
|
||||
next;
|
||||
}
|
||||
|
||||
# Convert fonts changes and output an ordinary line. Ensure that indented
|
||||
# lines are marked as literal.
|
||||
|
||||
$_ = &do_line($_);
|
||||
&new_para() if (!$inpara);
|
||||
|
||||
if (/^\s/)
|
||||
{
|
||||
if (!$inpre)
|
||||
{
|
||||
print TEMP "<pre>\n";
|
||||
$inpre = 1;
|
||||
}
|
||||
}
|
||||
elsif ($inpre)
|
||||
{
|
||||
print TEMP "</pre>\n";
|
||||
$inpre = 0;
|
||||
}
|
||||
|
||||
# Add <br> to the end of a non-literal line if we are within .nf/.fi
|
||||
|
||||
$_ .= "<br>\n" if (!$inpre && $innf);
|
||||
|
||||
print TEMP;
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# The TOC, if present, will have been written - terminate it
|
||||
|
||||
print "</ul>\n" if ($toc);
|
||||
|
||||
# Copy the remainder to the standard output
|
||||
|
||||
close(TEMP);
|
||||
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
|
||||
|
||||
print while (<TEMP>);
|
||||
|
||||
print <<End ;
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
End
|
||||
|
||||
close(TEMP);
|
||||
unlink("/tmp/$$");
|
||||
|
||||
# End
|
36
pcre2/AUTHORS
Normal file
36
pcre2/AUTHORS
Normal file
@ -0,0 +1,36 @@
|
||||
THE MAIN PCRE2 LIBRARY CODE
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2015 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
||||
--------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2015 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2015 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
768
pcre2/CMakeLists.txt
Normal file
768
pcre2/CMakeLists.txt
Normal file
@ -0,0 +1,768 @@
|
||||
# CMakeLists.txt
|
||||
#
|
||||
#
|
||||
# This file enables PCRE2 to be built with the CMake configuration and build
|
||||
# tool. Download CMake in source or binary form from http://www.cmake.org/
|
||||
# Converted to support PCRE2 from the original PCRE file, August 2014.
|
||||
#
|
||||
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
|
||||
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
|
||||
# 2007-09-19 Adjusted by PH to retain previous default settings
|
||||
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
|
||||
# (b) Ensure pcretest and pcregrep link with the local library,
|
||||
# not a previously-installed one.
|
||||
# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
|
||||
# PCRE_SUPPORT_LIBBZ2.
|
||||
# 2008-01-20 Brought up to date to include several new features by Christian
|
||||
# Ehrlicher.
|
||||
# 2008-01-22 Sheri added options for backward compatibility of library names
|
||||
# when building with minGW:
|
||||
# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
|
||||
# be built without "lib" as prefix. (The libraries will be named
|
||||
# pcre.dll, pcreposix.dll and pcrecpp.dll).
|
||||
# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
|
||||
# be built with suffix of "-0.dll". (The libraries will be named
|
||||
# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
|
||||
# built by default with Configure and Make.
|
||||
# 2008-01-23 PH removed the automatic build of pcredemo.
|
||||
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
|
||||
# 2008-07-03 PH updated for revised UCP property support (change of files)
|
||||
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
|
||||
# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
|
||||
# is included within another project.
|
||||
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
|
||||
# add options to stop the building of pcregrep and the tests, and
|
||||
# to disable the final configuration report.
|
||||
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
|
||||
# are set by specifying a release type.
|
||||
# 2010-01-02 PH added test for stdint.h
|
||||
# 2010-03-02 PH added test for inttypes.h
|
||||
# 2011-08-01 PH added PCREGREP_BUFSIZE
|
||||
# 2011-08-22 PH added PCRE_SUPPORT_JIT
|
||||
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
|
||||
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
|
||||
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
|
||||
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
|
||||
# the source dir by the user prior to building
|
||||
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
|
||||
# of DEBUG location only (likely only matters in MSVC)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
|
||||
# RunGrepTest (used for UNIX and Msys)
|
||||
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
|
||||
# RunTest.bat in Win32 (for effortless testing with "make test")
|
||||
# 2011-10-04 Sheri Increased minimum required cmake version
|
||||
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
|
||||
# 2012-01-10 Zoltan Herczeg added libpcre16 support
|
||||
# 2012-01-13 Stephen Kelly added out of source build support
|
||||
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
|
||||
# of the configure.ac file
|
||||
# 2012-02-26 PH added support for libedit
|
||||
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||
# 2012-09-08 ChPe added PCRE32 support
|
||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||
# so it has been removed.
|
||||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
|
||||
# 2015-04024 PH added support for PCRE2_DEBUG
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
# Increased minimum to 2.8.0 to support newer add_test features. Set policy
|
||||
# CMP0026 to avoid warnings for the use of LOCATION in GET_TARGET_PROPERTY.
|
||||
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
||||
CMAKE_POLICY(SET CMP0026 OLD)
|
||||
|
||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||
|
||||
SET(CMAKE_C_FLAGS -I${PROJECT_SOURCE_DIR}/src)
|
||||
|
||||
# external packages
|
||||
FIND_PACKAGE( BZip2 )
|
||||
FIND_PACKAGE( ZLIB )
|
||||
FIND_PACKAGE( Readline )
|
||||
FIND_PACKAGE( Editline )
|
||||
|
||||
# Configuration checks
|
||||
|
||||
INCLUDE(CheckIncludeFile)
|
||||
INCLUDE(CheckFunctionExists)
|
||||
INCLUDE(CheckTypeSize)
|
||||
|
||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
|
||||
CHECK_INCLUDE_FILE(inttypes.h HAVE_INTTYPES_H)
|
||||
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
|
||||
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
|
||||
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
|
||||
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
|
||||
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
|
||||
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
|
||||
|
||||
# User-configurable options
|
||||
#
|
||||
# Note: CMakeSetup displays these in alphabetical order, regardless of
|
||||
# the order we use here.
|
||||
|
||||
SET(BUILD_SHARED_LIBS OFF CACHE BOOL
|
||||
"Build shared libraries instead of static ones.")
|
||||
|
||||
OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
|
||||
|
||||
OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
|
||||
|
||||
OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
|
||||
|
||||
OPTION(PCRE2_DEBUG "Include debugging code" OFF)
|
||||
|
||||
SET(PCRE2_EBCDIC OFF CACHE BOOL
|
||||
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
|
||||
|
||||
SET(PCRE2_EBCDIC_NL25 OFF CACHE BOOL
|
||||
"Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
|
||||
|
||||
SET(PCRE2_LINK_SIZE "2" CACHE STRING
|
||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
|
||||
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
|
||||
|
||||
SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING
|
||||
"Buffer size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_NEWLINE "LF" CACHE STRING
|
||||
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
||||
|
||||
SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
|
||||
"If ON, then don't use stack recursion when matching. See HEAP_MATCH_RECURSE in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
|
||||
"Enable support for Just-in-time compiling.")
|
||||
|
||||
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
|
||||
"Enable use of Just-in-time compiling in pcre2grep.")
|
||||
|
||||
SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
|
||||
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||
|
||||
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||
|
||||
SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL
|
||||
"Enable Valgrind support.")
|
||||
|
||||
OPTION(PCRE2_SHOW_REPORT "Show the final configuration report" ON)
|
||||
OPTION(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON)
|
||||
OPTION(PCRE2_BUILD_TESTS "Build the tests" ON)
|
||||
|
||||
IF (MINGW)
|
||||
OPTION(NON_STANDARD_LIB_PREFIX
|
||||
"ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF)
|
||||
|
||||
OPTION(NON_STANDARD_LIB_SUFFIX
|
||||
"ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc."
|
||||
OFF)
|
||||
ENDIF(MINGW)
|
||||
|
||||
IF(MSVC)
|
||||
OPTION(INSTALL_MSVC_PDB
|
||||
"ON=Install .pdb files built by MSVC, if generated"
|
||||
OFF)
|
||||
ENDIF(MSVC)
|
||||
|
||||
# bzip2 lib
|
||||
IF(BZIP2_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON)
|
||||
ENDIF(BZIP2_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBBZ2)
|
||||
INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBBZ2)
|
||||
|
||||
# zlib
|
||||
IF(ZLIB_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON)
|
||||
ENDIF(ZLIB_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBZ)
|
||||
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBZ)
|
||||
|
||||
# editline lib
|
||||
IF(EDITLINE_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF)
|
||||
ENDIF(EDITLINE_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBEDIT)
|
||||
INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBEDIT)
|
||||
|
||||
# readline lib
|
||||
IF(READLINE_FOUND)
|
||||
OPTION (PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON)
|
||||
ENDIF(READLINE_FOUND)
|
||||
IF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
INCLUDE_DIRECTORIES(${READLINE_INCLUDE_DIR})
|
||||
ENDIF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
|
||||
# Prepare build configuration
|
||||
|
||||
IF(NOT BUILD_SHARED_LIBS)
|
||||
SET(PCRE2_STATIC 1)
|
||||
ENDIF(NOT BUILD_SHARED_LIBS)
|
||||
|
||||
IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
|
||||
MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled")
|
||||
ENDIF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
SET(SUPPORT_PCRE2_8 1)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
SET(SUPPORT_PCRE2_16 1)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
SET(SUPPORT_PCRE2_32 1)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
|
||||
MESSAGE(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program")
|
||||
SET(PCRE2_BUILD_PCRE2GREP OFF)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
|
||||
MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
|
||||
ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
SET(BSR_ANYCRLF 1)
|
||||
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
|
||||
IF(PCRE2_SUPPORT_UNICODE)
|
||||
SET(SUPPORT_UNICODE 1)
|
||||
ENDIF(PCRE2_SUPPORT_UNICODE)
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT)
|
||||
SET(SUPPORT_JIT 1)
|
||||
ENDIF(PCRE2_SUPPORT_JIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_PCRE2GREP_JIT)
|
||||
SET(SUPPORT_PCRE2GREP_JIT 1)
|
||||
ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_VALGRIND)
|
||||
SET(SUPPORT_VALGRIND 1)
|
||||
ENDIF(PCRE2_SUPPORT_VALGRIND)
|
||||
|
||||
# This next one used to reference ${READLINE_LIBRARY})
|
||||
# but I was advised to add the NCURSES test as well, along with
|
||||
# some modifications to cmake/FindReadline.cmake which should
|
||||
# make it possible to override the default if necessary. PH
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
SET(SUPPORT_LIBREADLINE 1)
|
||||
SET(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||
ENDIF(PCRE2_SUPPORT_LIBREADLINE)
|
||||
|
||||
# libedit is a plug-compatible alternative to libreadline
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBEDIT)
|
||||
SET(SUPPORT_LIBEDIT 1)
|
||||
SET(PCRE2TEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY})
|
||||
ENDIF(PCRE2_SUPPORT_LIBEDIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBZ)
|
||||
SET(SUPPORT_LIBZ 1)
|
||||
SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES})
|
||||
ENDIF(PCRE2_SUPPORT_LIBZ)
|
||||
|
||||
IF(PCRE2_SUPPORT_LIBBZ2)
|
||||
SET(SUPPORT_LIBBZ2 1)
|
||||
SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES})
|
||||
ENDIF(PCRE2_SUPPORT_LIBBZ2)
|
||||
|
||||
SET(NEWLINE_DEFAULT "")
|
||||
|
||||
IF(PCRE2_NEWLINE STREQUAL "CR")
|
||||
SET(NEWLINE_DEFAULT "1")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "CR")
|
||||
IF(PCRE2_NEWLINE STREQUAL "LF")
|
||||
SET(NEWLINE_DEFAULT "2")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "LF")
|
||||
IF(PCRE2_NEWLINE STREQUAL "CRLF")
|
||||
SET(NEWLINE_DEFAULT "3")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "CRLF")
|
||||
IF(PCRE2_NEWLINE STREQUAL "ANY")
|
||||
SET(NEWLINE_DEFAULT "4")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "ANY")
|
||||
IF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
SET(NEWLINE_DEFAULT "5")
|
||||
ENDIF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
|
||||
|
||||
IF(NEWLINE_DEFAULT STREQUAL "")
|
||||
MESSAGE(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
|
||||
ENDIF(NEWLINE_DEFAULT STREQUAL "")
|
||||
|
||||
IF(PCRE2_EBCDIC)
|
||||
SET(EBCDIC 1)
|
||||
ENDIF(PCRE2_EBCDIC)
|
||||
|
||||
IF(PCRE2_EBCDIC_NL25)
|
||||
SET(EBCDIC 1)
|
||||
SET(EBCDIC_NL25 1)
|
||||
ENDIF(PCRE2_EBCDIC_NL25)
|
||||
|
||||
IF(PCRE2_HEAP_MATCH_RECURSE)
|
||||
SET(HEAP_MATCH_RECURSE 1)
|
||||
ENDIF(PCRE2_HEAP_MATCH_RECURSE)
|
||||
|
||||
# Output files
|
||||
|
||||
CONFIGURE_FILE(config-cmake.h.in
|
||||
${PROJECT_BINARY_DIR}/config.h
|
||||
@ONLY)
|
||||
|
||||
# Parse version numbers and date out of configure.ac
|
||||
|
||||
file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
|
||||
configure_lines
|
||||
LIMIT_COUNT 50 # Read only the first 50 lines of the file
|
||||
)
|
||||
|
||||
set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
|
||||
foreach(configure_line ${configure_lines})
|
||||
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
||||
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
||||
if (NOT ${_substitution_variable_upper})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
|
||||
if (CMAKE_MATCH_1)
|
||||
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
CONFIGURE_FILE(src/pcre2.h.in
|
||||
${PROJECT_BINARY_DIR}/pcre2.h
|
||||
@ONLY)
|
||||
|
||||
# What about pcre2-config and libpcre2.pc?
|
||||
|
||||
# Character table generation
|
||||
|
||||
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||
IF(PCRE2_REBUILD_CHARTABLES)
|
||||
ADD_EXECUTABLE(dftables src/dftables.c)
|
||||
ADD_CUSTOM_COMMAND(
|
||||
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||
DEPENDS dftables
|
||||
COMMAND dftables
|
||||
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
)
|
||||
ELSE(PCRE2_REBUILD_CHARTABLES)
|
||||
CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist
|
||||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
COPYONLY)
|
||||
ENDIF(PCRE2_REBUILD_CHARTABLES)
|
||||
|
||||
# Source code
|
||||
|
||||
SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
|
||||
|
||||
SET(PCRE2_SOURCES
|
||||
src/pcre2_auto_possess.c
|
||||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
src/pcre2_compile.c
|
||||
src/pcre2_config.c
|
||||
src/pcre2_context.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
src/pcre2_match_data.c
|
||||
src/pcre2_newline.c
|
||||
src/pcre2_ord2utf.c
|
||||
src/pcre2_pattern_info.c
|
||||
src/pcre2_serialize.c
|
||||
src/pcre2_string_utils.c
|
||||
src/pcre2_study.c
|
||||
src/pcre2_substitute.c
|
||||
src/pcre2_substring.c
|
||||
src/pcre2_tables.c
|
||||
src/pcre2_ucd.c
|
||||
src/pcre2_valid_utf.c
|
||||
src/pcre2_xclass.c
|
||||
)
|
||||
|
||||
SET(PCRE2POSIX_HEADERS src/pcre2posix.h)
|
||||
SET(PCRE2POSIX_SOURCES src/pcre2posix.c)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
|
||||
PRE-LINK
|
||||
COMMAND windres ARGS pcre2.rc pcre2.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT Using pcre2 coff info in mingw build)
|
||||
SET(PCRE2_SOURCES
|
||||
${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o
|
||||
)
|
||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o
|
||||
PRE-LINK
|
||||
COMMAND windres ARGS pcre2posix.rc pcre2posix.o
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT Using pcre2posix coff info in mingw build)
|
||||
SET(PCRE2POSIX_SOURCES
|
||||
${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o
|
||||
)
|
||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
|
||||
IF(MSVC AND NOT PCRE2_STATIC)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
SET(PCRE2_SOURCES
|
||||
${PCRE2_SOURCES} pcre2.rc)
|
||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
SET(PCRE2POSIX_SOURCES
|
||||
${PCRE2POSIX_SOURCES} pcre2posix.rc)
|
||||
ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
|
||||
ENDIF(MSVC AND NOT PCRE2_STATIC)
|
||||
|
||||
# Build setup
|
||||
|
||||
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
|
||||
|
||||
IF(MSVC)
|
||||
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
|
||||
ENDIF(MSVC)
|
||||
|
||||
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
# needed to make sure to not link debug libs
|
||||
# against release libs and vice versa
|
||||
IF(WIN32)
|
||||
SET(CMAKE_DEBUG_POSTFIX "d")
|
||||
ENDIF(WIN32)
|
||||
|
||||
SET(targets)
|
||||
|
||||
# 8-bit library
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-8
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2-8)
|
||||
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
SET_PROPERTY(TARGET pcre2posix
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2posix)
|
||||
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES PREFIX "")
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES SUFFIX "-0.dll")
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
|
||||
# 16-bit library
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-16
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
|
||||
SET(targets ${targets} pcre2-16)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES PREFIX "")
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES SUFFIX "-0.dll")
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
|
||||
# 32-bit library
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-32
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
|
||||
SET(targets ${targets} pcre2-32)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF(NON_STANDARD_LIB_PREFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES PREFIX "")
|
||||
ENDIF(NON_STANDARD_LIB_PREFIX)
|
||||
IF(NON_STANDARD_LIB_SUFFIX)
|
||||
SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES SUFFIX "-0.dll")
|
||||
ENDIF(NON_STANDARD_LIB_SUFFIX)
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
|
||||
# Executables
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
|
||||
SET_PROPERTY(TARGET pcre2grep
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2grep)
|
||||
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
|
||||
# Testing
|
||||
|
||||
IF(PCRE2_BUILD_TESTS)
|
||||
ENABLE_TESTING()
|
||||
|
||||
SET(PCRE2TEST_SOURCES src/pcre2test.c)
|
||||
|
||||
ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES})
|
||||
SET(targets ${targets} pcre2test)
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
LIST(APPEND PCRE2TEST_LIBS pcre2posix pcre2-8)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
LIST(APPEND PCRE2TEST_LIBS pcre2-16)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
LIST(APPEND PCRE2TEST_LIBS pcre2-32)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS})
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT)
|
||||
ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c)
|
||||
SET(targets ${targets} pcre2_jit_test)
|
||||
SET(PCRE2_JIT_TEST_LIBS )
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-8)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_8)
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-16)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-32)
|
||||
ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||
TARGET_LINK_LIBRARIES(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
|
||||
ENDIF(PCRE2_SUPPORT_JIT)
|
||||
|
||||
# exes in Debug location tested by the RunTest shell script
|
||||
# via "make test"
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
|
||||
GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION)
|
||||
|
||||
# =================================================
|
||||
# Write out a CTest configuration file
|
||||
#
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
|
||||
"# This is a generated file.
|
||||
MESSAGE(\"When testing is complete, review test output in the
|
||||
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
|
||||
MESSAGE(\" \")
|
||||
")
|
||||
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
. ${PROJECT_SOURCE_DIR}/RunTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
")
|
||||
|
||||
IF(UNIX)
|
||||
ADD_TEST(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
ENDIF(UNIX)
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh
|
||||
"#! /bin/sh
|
||||
# This is a generated file.
|
||||
. ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
")
|
||||
|
||||
IF(UNIX)
|
||||
ADD_TEST(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
ENDIF(UNIX)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
|
||||
IF(WIN32)
|
||||
# Provide environment for executing the bat file version of RunTest
|
||||
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
|
||||
FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
|
||||
FILE(TO_NATIVE_PATH ${PCRE2TEST_EXE} winexe)
|
||||
|
||||
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.bat
|
||||
"\@REM This is a generated file.
|
||||
\@echo off
|
||||
setlocal
|
||||
SET srcdir=\"${winsrc}\"
|
||||
SET pcre2test=\"${winexe}\"
|
||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
|
||||
call %srcdir%\\RunTest.Bat
|
||||
if errorlevel 1 exit /b 1
|
||||
echo RunTest.bat tests successfully completed
|
||||
")
|
||||
|
||||
ADD_TEST(NAME pcre2_test_bat
|
||||
COMMAND pcre2_test.bat)
|
||||
SET_TESTS_PROPERTIES(pcre2_test_bat PROPERTIES
|
||||
PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
|
||||
|
||||
IF("$ENV{OSTYPE}" STREQUAL "msys")
|
||||
# Both the sh and bat file versions of RunTest are run if make test is used
|
||||
# in msys
|
||||
ADD_TEST(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh)
|
||||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
ADD_TEST(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
ENDIF("$ENV{OSTYPE}" STREQUAL "msys")
|
||||
ENDIF(WIN32)
|
||||
|
||||
# Changed to accommodate testing whichever location was just built
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT)
|
||||
ADD_TEST(pcre2_jit_test pcre2_jit_test)
|
||||
ENDIF(PCRE2_SUPPORT_JIT)
|
||||
|
||||
ENDIF(PCRE2_BUILD_TESTS)
|
||||
|
||||
# Installation
|
||||
|
||||
SET(CMAKE_INSTALL_ALWAYS 1)
|
||||
|
||||
INSTALL(TARGETS ${targets}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
|
||||
INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
|
||||
|
||||
FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
|
||||
FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
|
||||
FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
|
||||
|
||||
FOREACH(man ${man3})
|
||||
GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
|
||||
SET(man3_new ${man3} ${man})
|
||||
ENDFOREACH(man ${man3})
|
||||
SET(man3 ${man3_new})
|
||||
|
||||
INSTALL(FILES ${man1} DESTINATION man/man1)
|
||||
INSTALL(FILES ${man3} DESTINATION man/man3)
|
||||
INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html)
|
||||
|
||||
IF(MSVC AND INSTALL_MSVC_PDB)
|
||||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2posix.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS RelWithDebInfo)
|
||||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2posixd.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS Debug)
|
||||
ENDIF(MSVC AND INSTALL_MSVC_PDB)
|
||||
|
||||
# Help, only for nice output
|
||||
IF(BUILD_SHARED_LIBS)
|
||||
SET(BUILD_STATIC_LIBS OFF)
|
||||
ELSE(BUILD_SHARED_LIBS)
|
||||
SET(BUILD_STATIC_LIBS ON)
|
||||
ENDIF(BUILD_SHARED_LIBS)
|
||||
|
||||
IF(PCRE2_SHOW_REPORT)
|
||||
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||
IF (CMAKE_C_FLAGS)
|
||||
SET(cfsp " ")
|
||||
ENDIF(CMAKE_C_FLAGS)
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "PCRE2 configuration summary:")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS " Build 8 bit PCRE2 library ....... : ${PCRE2_BUILD_PCRE2_8}")
|
||||
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_16}")
|
||||
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_32}")
|
||||
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
|
||||
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
|
||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
||||
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
|
||||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
||||
MESSAGE(STATUS " Use heap recursion .............. : ${PCRE2_HEAP_MATCH_RECURSE}")
|
||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
|
||||
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE2_MATCH_LIMIT_RECURSION}")
|
||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
||||
MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
|
||||
MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}")
|
||||
MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}")
|
||||
MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}")
|
||||
MESSAGE(STATUS " and pcre2grep)")
|
||||
IF(ZLIB_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}")
|
||||
ELSE(ZLIB_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libz ........ : Library not found" )
|
||||
ENDIF(ZLIB_FOUND)
|
||||
IF(BZIP2_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : ${PCRE2_SUPPORT_LIBBZ2}")
|
||||
ELSE(BZIP2_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : Library not found" )
|
||||
ENDIF(BZIP2_FOUND)
|
||||
IF(EDITLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libeditline . : ${PCRE2_SUPPORT_LIBEDIT}")
|
||||
ELSE(EDITLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libeditline . : Library not found" )
|
||||
ENDIF(EDITLINE_FOUND)
|
||||
IF(READLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libreadline . : ${PCRE2_SUPPORT_LIBREADLINE}")
|
||||
ELSE(READLINE_FOUND)
|
||||
MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" )
|
||||
ENDIF(READLINE_FOUND)
|
||||
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
||||
ENDIF(MINGW AND NOT PCRE2_STATIC)
|
||||
|
||||
IF(MSVC)
|
||||
MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
|
||||
ENDIF(MSVC)
|
||||
|
||||
MESSAGE(STATUS "")
|
||||
ENDIF(PCRE2_SHOW_REPORT)
|
||||
|
||||
# end CMakeLists.txt
|
674
pcre2/COPYING
Normal file
674
pcre2/COPYING
Normal file
@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
367
pcre2/ChangeLog
Normal file
367
pcre2/ChangeLog
Normal file
@ -0,0 +1,367 @@
|
||||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
||||
1. Callouts with string arguments have been added.
|
||||
|
||||
2. Assertion code generator in JIT has been optimized.
|
||||
|
||||
3. The invalid pattern (?(?C) has a missing assertion condition at the end. The
|
||||
pcre2_compile() function read past the end of the input before diagnosing an
|
||||
error. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
4. Implemented pcre2_callout_enumerate().
|
||||
|
||||
5. Fix JIT compilation of conditional blocks whose assertion is converted to
|
||||
(*FAIL). E.g: /(?(?!))/.
|
||||
|
||||
6. The pattern /(?(?!)^)/ caused references to random memory. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
7. The assertion (?!) is optimized to (*FAIL). This was not handled correctly
|
||||
when this assertion was used as a condition, for example (?(?!)a|b). In
|
||||
pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect
|
||||
error about an unsupported item.
|
||||
|
||||
8. For some types of pattern, for example /Z*(|d*){216}/, the auto-
|
||||
possessification code could take exponential time to complete. A recursion
|
||||
depth limit of 1000 has been imposed to limit the resources used by this
|
||||
optimization. This infelicity was discovered by the LLVM fuzzer.
|
||||
|
||||
9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
|
||||
such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored
|
||||
because \S ensures they are all in the class. The code for doing this was
|
||||
interacting badly with the code for computing the amount of space needed to
|
||||
compile the pattern, leading to a buffer overflow. This bug was discovered by
|
||||
the LLVM fuzzer.
|
||||
|
||||
10. A pattern such as /((?2)+)((?1))/ which has mutual recursion nested inside
|
||||
other kinds of group caused stack overflow at compile time. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
11. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment
|
||||
between a subroutine call and its quantifier was incorrectly compiled, leading
|
||||
to buffer overflow or other errors. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
12. The illegal pattern /(?(?<E>.*!.*)?)/ was not being diagnosed as missing an
|
||||
assertion after (?(. The code was failing to check the character after (?(?<
|
||||
for the ! or = that would indicate a lookbehind assertion. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
13. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with
|
||||
a fixed maximum following a group that contains a subroutine reference was
|
||||
incorrectly compiled and could trigger buffer overflow. This bug was discovered
|
||||
by the LLVM fuzzer.
|
||||
|
||||
14. Negative relative recursive references such as (?-7) to non-existent
|
||||
subpatterns were not being diagnosed and could lead to unpredictable behaviour.
|
||||
This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
15. The bug fixed in 14 was due to an integer variable that was unsigned when
|
||||
it should have been signed. Some other "int" variables, having been checked,
|
||||
have either been changed to uint32_t or commented as "must be signed".
|
||||
|
||||
16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1)))
|
||||
caused a stack overflow instead of the diagnosis of a non-fixed length
|
||||
lookbehind assertion. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
|
||||
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
|
||||
|
||||
18. There was a similar problem to 17 in pcre2test for global matches, though
|
||||
the code there did catch the loop.
|
||||
|
||||
19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
|
||||
and a subsequent item in the pattern caused a non-match, backtracking over the
|
||||
repeated \X did not stop, but carried on past the start of the subject, causing
|
||||
reference to random memory and/or a segfault. There were also some other cases
|
||||
where backtracking after \C could crash. This set of bugs was discovered by the
|
||||
LLVM fuzzer.
|
||||
|
||||
20. The function for finding the minimum length of a matching string could take
|
||||
a very long time if mutual recursion was present many times in a pattern, for
|
||||
example, /((?2){73}(?2))((?1))/. A better mutual recursion detection method has
|
||||
been implemented. This infelicity was discovered by the LLVM fuzzer.
|
||||
|
||||
21. Implemented PCRE2_NEVER_BACKSLASH_C.
|
||||
|
||||
22. The feature for string replication in pcre2test could read from freed
|
||||
memory if the replication required a buffer to be extended, and it was not
|
||||
working properly in 16-bit and 32-bit modes. This issue was discovered by a
|
||||
fuzzer: see http://lcamtuf.coredump.cx/afl/.
|
||||
|
||||
23. Added the PCRE2_ALT_CIRCUMFLEX option.
|
||||
|
||||
24. Adjust the treatment of \8 and \9 to be the same as the current Perl
|
||||
behaviour.
|
||||
|
||||
25. Static linking against the PCRE2 library using the pkg-config module was
|
||||
failing on missing pthread symbols.
|
||||
|
||||
26. If a group that contained a recursive back reference also contained a
|
||||
forward reference subroutine call followed by a non-forward-reference
|
||||
subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
|
||||
compile correct code, leading to undefined behaviour or an internally detected
|
||||
error. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
27. Quantification of certain items (e.g. atomic back references) could cause
|
||||
incorrect code to be compiled when recursive forward references were involved.
|
||||
For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
28. A repeated conditional group whose condition was a reference by name caused
|
||||
a buffer overflow if there was more than one group with the given name. This
|
||||
bug was discovered by the LLVM fuzzer.
|
||||
|
||||
29. A recursive back reference by name within a group that had the same name as
|
||||
another group caused a buffer overflow. For example: /(?J)(?'d'(?'d'\g{d}))/.
|
||||
This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
30. A forward reference by name to a group whose number is the same as the
|
||||
current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused a
|
||||
buffer overflow at compile time. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
31. Fix -fsanitize=undefined warnings for left shifts of 1 by 31 (it treats 1
|
||||
as an int; fixed by writing it as 1u).
|
||||
|
||||
32. Fix pcre2grep compile when -std=c99 is used with gcc, though it still gives
|
||||
a warning for "fileno" unless -std=gnu99 us used.
|
||||
|
||||
33. A lookbehind assertion within a set of mutually recursive subpatterns could
|
||||
provoke a buffer overflow. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
34. Give an error for an empty subpattern name such as (?'').
|
||||
|
||||
35. Make pcre2test give an error if a pattern that follows #forbud_utf contains
|
||||
\P, \p, or \X.
|
||||
|
||||
36. The way named subpatterns are handled has been refactored. There is now a
|
||||
pre-pass over the regex which does nothing other than identify named
|
||||
subpatterns and count the total captures. This means that information about
|
||||
named patterns is known before the rest of the compile. In particular, it means
|
||||
that forward references can be checked as they are encountered. Previously, the
|
||||
code for handling forward references was contorted and led to several errors in
|
||||
computing the memory requirements for some patterns, leading to buffer
|
||||
overflows.
|
||||
|
||||
37. There was no check for integer overflow in subroutine calls such as (?123).
|
||||
|
||||
38. The table entry for \l in EBCDIC environments was incorrect, leading to its
|
||||
being treated as a literal 'l' instead of causing an error.
|
||||
|
||||
39. If a non-capturing group containing a conditional group that could match
|
||||
an empty string was repeated, it was not identified as matching an empty string
|
||||
itself. For example: /^(?:(?(1)x|)+)+$()/.
|
||||
|
||||
40. In an EBCDIC environment, pcretest was mishandling the escape sequences
|
||||
\a and \e in test subject lines.
|
||||
|
||||
41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
|
||||
instead of the EBCDIC value.
|
||||
|
||||
42. The handling of \c in an EBCDIC environment has been revised so that it is
|
||||
now compatible with the specification in Perl's perlebcdic page.
|
||||
|
||||
43. Single character repetition in JIT has been improved. 20-30% speedup
|
||||
was achieved on certain patterns.
|
||||
|
||||
44. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in
|
||||
ASCII/Unicode. This has now been added to the list of characters that are
|
||||
recognized as white space in EBCDIC.
|
||||
|
||||
45. When PCRE2 was compiled without Unicode support, the use of \p and \P gave
|
||||
an error (correctly) when used outside a class, but did not give an error
|
||||
within a class.
|
||||
|
||||
46. \h within a class was incorrectly compiled in EBCDIC environments.
|
||||
|
||||
47. JIT should return with error when the compiled pattern requires
|
||||
more stack space than the maximum.
|
||||
|
||||
48. Fixed a memory leak in pcre2grep when a locale is set.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
1. When a pattern is compiled, it remembers the highest back reference so that
|
||||
when matching, if the ovector is too small, extra memory can be obtained to
|
||||
use instead. A conditional subpattern whose condition is a check on a capture
|
||||
having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is
|
||||
another kind of back reference, but it was not setting the highest
|
||||
backreference number. This mattered only if pcre2_match() was called with an
|
||||
ovector that was too small to hold the capture, and there was no other kind of
|
||||
back reference (a situation which is probably quite rare). The effect of the
|
||||
bug was that the condition was always treated as FALSE when the capture could
|
||||
not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug
|
||||
has been fixed.
|
||||
|
||||
2. Functions for serialization and deserialization of sets of compiled patterns
|
||||
have been added.
|
||||
|
||||
3. The value that is returned by PCRE2_INFO_SIZE has been corrected to remove
|
||||
excess code units at the end of the data block that may occasionally occur if
|
||||
the code for calculating the size over-estimates. This change stops the
|
||||
serialization code copying uninitialized data, to which valgrind objects. The
|
||||
documentation of PCRE2_INFO_SIZE was incorrect in stating that the size did not
|
||||
include the general overhead. This has been corrected.
|
||||
|
||||
4. All code units in every slot in the table of group names are now set, again
|
||||
in order to avoid accessing uninitialized data when serializing.
|
||||
|
||||
5. The (*NO_JIT) feature is implemented.
|
||||
|
||||
6. If a bug that caused pcre2_compile() to use more memory than allocated was
|
||||
triggered when using valgrind, the code in (3) above passed a stupidly large
|
||||
value to valgrind. This caused a crash instead of an "internal error" return.
|
||||
|
||||
7. A reference to a duplicated named group (either a back reference or a test
|
||||
for being set in a conditional) that occurred in a part of the pattern where
|
||||
PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern
|
||||
to be incorrectly calculated, leading to overwriting.
|
||||
|
||||
8. A mutually recursive set of back references such as (\2)(\1) caused a
|
||||
segfault at compile time (while trying to find the minimum matching length).
|
||||
The infinite loop is now broken (with the minimum length unset, that is, zero).
|
||||
|
||||
9. If an assertion that was used as a condition was quantified with a minimum
|
||||
of zero, matching went wrong. In particular, if the whole group had unlimited
|
||||
repetition and could match an empty string, a segfault was likely. The pattern
|
||||
(?(?=0)?)+ is an example that caused this. Perl allows assertions to be
|
||||
quantified, but not if they are being used as conditions, so the above pattern
|
||||
is faulted by Perl. PCRE2 has now been changed so that it also rejects such
|
||||
patterns.
|
||||
|
||||
10. The error message for an invalid quantifier has been changed from "nothing
|
||||
to repeat" to "quantifier does not follow a repeatable item".
|
||||
|
||||
11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but
|
||||
scanning the compiled pattern in subsequent auto-possessification can get out
|
||||
of step and lead to an unknown opcode. Previously this could have caused an
|
||||
infinite loop. Now it generates an "internal error" error. This is a tidyup,
|
||||
not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an
|
||||
undefined outcome.
|
||||
|
||||
12. A UTF pattern containing a "not" match of a non-ASCII character and a
|
||||
subroutine reference could loop at compile time. Example: /[^\xff]((?1))/.
|
||||
|
||||
13. The locale test (RunTest 3) has been upgraded. It now checks that a locale
|
||||
that is found in the output of "locale -a" can actually be set by pcre2test
|
||||
before it is accepted. Previously, in an environment where a locale was listed
|
||||
but would not set (an example does exist), the test would "pass" without
|
||||
actually doing anything. Also the fr_CA locale has been added to the list of
|
||||
locales that can be used.
|
||||
|
||||
14. Fixed a bug in pcre2_substitute(). If a replacement string ended in a
|
||||
capturing group number without parentheses, the last character was incorrectly
|
||||
literally included at the end of the replacement string.
|
||||
|
||||
15. A possessive capturing group such as (a)*+ with a minimum repeat of zero
|
||||
failed to allow the zero-repeat case if pcre2_match() was called with an
|
||||
ovector too small to capture the group.
|
||||
|
||||
16. Improved error message in pcre2test when setting the stack size (-S) fails.
|
||||
|
||||
17. Fixed two bugs in CMakeLists.txt: (1) Some lines had got lost in the
|
||||
transfer from PCRE1, meaning that CMake configuration failed if "build tests"
|
||||
was selected. (2) The file src/pcre2_serialize.c had not been added to the list
|
||||
of PCRE2 sources, which caused a failure to build pcre2test.
|
||||
|
||||
18. Fixed typo in pcre2_serialize.c (DECL instead of DEFN) that causes problems
|
||||
only on Windows.
|
||||
|
||||
19. Use binary input when reading back saved serialized patterns in pcre2test.
|
||||
|
||||
20. Added RunTest.bat for running the tests under Windows.
|
||||
|
||||
21. "make distclean" was not removing config.h, a file that may be created for
|
||||
use with CMake.
|
||||
|
||||
22. A pattern such as "((?2){0,1999}())?", which has a group containing a
|
||||
forward reference repeated a large (but limited) number of times within a
|
||||
repeated outer group that has a zero minimum quantifier, caused incorrect code
|
||||
to be compiled, leading to the error "internal error: previously-checked
|
||||
referenced subpattern not found" when an incorrect memory address was read.
|
||||
This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's
|
||||
FortiGuard Labs. (Added 24-March-2015: CVE-2015-2325 was given to this.)
|
||||
|
||||
23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine
|
||||
call within a group that also contained a recursive back reference caused
|
||||
incorrect code to be compiled. This bug was reported as "heap overflow",
|
||||
discovered by Kai Lu of Fortinet's FortiGuard Labs. (Added 24-March-2015:
|
||||
CVE-2015-2326 was given to this.)
|
||||
|
||||
24. Computing the size of the JIT read-only data in advance has been a source
|
||||
of various issues, and new ones are still appear unfortunately. To fix
|
||||
existing and future issues, size computation is eliminated from the code,
|
||||
and replaced by on-demand memory allocation.
|
||||
|
||||
25. A pattern such as /(?i)[A-`]/, where characters in the other case are
|
||||
adjacent to the end of the range, and the range contained characters with more
|
||||
than one other case, caused incorrect behaviour when compiled in UTF mode. In
|
||||
that example, the range a-j was left out of the class.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36.
|
||||
|
||||
The code of the library was heavily revised as part of the new API
|
||||
implementation. Details of each and every modification were not individually
|
||||
logged. In addition to the API changes, the following changes were made. They
|
||||
are either new functionality, or bug fixes and other noticeable changes of
|
||||
behaviour that were implemented after the code had been forked.
|
||||
|
||||
1. Including Unicode support at build time is now enabled by default, but it
|
||||
can optionally be disabled. It is not enabled by default at run time (no
|
||||
change).
|
||||
|
||||
2. The test program, now called pcre2test, was re-specified and almost
|
||||
completely re-written. Its input is not compatible with input for pcretest.
|
||||
|
||||
3. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
|
||||
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
|
||||
matched by that pattern.
|
||||
|
||||
4. For the benefit of those who use PCRE2 via some other application, that is,
|
||||
not writing the function calls themselves, it is possible to check the PCRE2
|
||||
version by matching a pattern such as /(?(VERSION>=10)yes|no)/ against a
|
||||
string such as "yesno".
|
||||
|
||||
5. There are case-equivalent Unicode characters whose encodings use different
|
||||
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
|
||||
theoretically possible for this to happen in UTF-16 too.) If a backreference to
|
||||
a group containing one of these characters was greedily repeated, and during
|
||||
the match a backtrack occurred, the subject might be backtracked by the wrong
|
||||
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
|
||||
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
|
||||
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
|
||||
Incorrect backtracking meant that group 2 captured only the last two bytes.
|
||||
This bug has been fixed; the new code is slower, but it is used only when the
|
||||
strings matched by the repetition are not all the same length.
|
||||
|
||||
6. A pattern such as /()a/ was not setting the "first character must be 'a'"
|
||||
information. This applied to any pattern with a group that matched no
|
||||
characters, for example: /(?:(?=.)|(?<!x))a/.
|
||||
|
||||
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
|
||||
those parentheses to be closed with whatever has been captured so far. However,
|
||||
it was failing to mark any other groups between the highest capture so far and
|
||||
the currrent group as "unset". Thus, the ovector for those groups contained
|
||||
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
|
||||
matched against "abcd".
|
||||
|
||||
8. The pcre2_substitute() function has been implemented.
|
||||
|
||||
9. If an assertion used as a condition was quantified with a minimum of zero
|
||||
(an odd thing to do, but it happened), SIGSEGV or other misbehaviour could
|
||||
occur.
|
||||
|
||||
10. The PCRE2_NO_DOTSTAR_ANCHOR option has been implemented.
|
||||
|
||||
****
|
67
pcre2/CheckMan
Executable file
67
pcre2/CheckMan
Executable file
@ -0,0 +1,67 @@
|
||||
#! /usr/bin/perl
|
||||
|
||||
# A script to scan PCRE2's man pages to check for typos in the control
|
||||
# sequences. I use only a small set of the available repertoire, so it is
|
||||
# straightforward to check that nothing else has slipped in by mistake. This
|
||||
# script should be called in the doc directory.
|
||||
|
||||
$yield = 0;
|
||||
|
||||
while (scalar(@ARGV) > 0)
|
||||
{
|
||||
$line = 0;
|
||||
$file = shift @ARGV;
|
||||
|
||||
open (IN, $file) || die "Failed to open $file\n";
|
||||
|
||||
while (<IN>)
|
||||
{
|
||||
$line++;
|
||||
if (/^\s*$/)
|
||||
{
|
||||
printf "Empty line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
elsif (/^\./)
|
||||
{
|
||||
if (!/^\.\s*$|
|
||||
^\.B\s+\S|
|
||||
^\.TH\s\S|
|
||||
^\.SH\s\S|
|
||||
^\.SS\s\S|
|
||||
^\.TP(?:\s?\d+)?\s*$|
|
||||
^\.SM\s*$|
|
||||
^\.br\s*$|
|
||||
^\.rs\s*$|
|
||||
^\.sp\s*$|
|
||||
^\.nf\s*$|
|
||||
^\.fi\s*$|
|
||||
^\.P\s*$|
|
||||
^\.PP\s*$|
|
||||
^\.\\"(?:\ HREF)?\s*$|
|
||||
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
|
||||
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
|
||||
^\.\\"\s<\/a>\s*$|
|
||||
^\.\\"\sJOINSH\s*$|
|
||||
^\.\\"\sJOIN\s*$/x
|
||||
)
|
||||
{
|
||||
printf "Bad control line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (/\\[^ef]|\\f[^IBP]/)
|
||||
{
|
||||
printf "Bad backslash in line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(IN);
|
||||
}
|
||||
|
||||
exit $yield;
|
||||
# End
|
113
pcre2/CleanTxt
Executable file
113
pcre2/CleanTxt
Executable file
@ -0,0 +1,113 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to take the output of nroff -man and remove all the backspacing and
|
||||
# the page footers and the screen commands etc so that it is more usefully
|
||||
# readable online. In fact, in the latest nroff, intermediate footers don't
|
||||
# seem to be generated any more.
|
||||
|
||||
$blankcount = 0;
|
||||
$lastwascut = 0;
|
||||
$firstheader = 1;
|
||||
|
||||
# Input on STDIN; output to STDOUT.
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
s/.\x8//g; # Remove "char, backspace"
|
||||
|
||||
# Handle header lines. Retain only the first one we encounter, but remove
|
||||
# the blank line that follows. Any others (e.g. at end of document) and the
|
||||
# following blank line are dropped.
|
||||
|
||||
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
|
||||
{
|
||||
if ($firstheader)
|
||||
{
|
||||
$firstheader = 0;
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
}
|
||||
$_=<STDIN>; # Remove a blank that follows
|
||||
next;
|
||||
}
|
||||
|
||||
# Count runs of empty lines
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
$blankcount++;
|
||||
$lastwascut = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
# If a chunk of lines has been cut out (page footer) and the next line
|
||||
# has a different indentation, put back one blank line.
|
||||
|
||||
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
|
||||
{
|
||||
($a) = $lastprinted =~ /^(\s*)/;
|
||||
($b) = $_ =~ /^(\s*)/;
|
||||
$blankcount++ if ($a ne $b);
|
||||
}
|
||||
|
||||
# We get here only when we have a non-blank line in hand. If it was preceded
|
||||
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
|
||||
# If so, remove all 7 lines, and remember that we have just done a cut.
|
||||
|
||||
if ($blankcount >= 3)
|
||||
{
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] = <STDIN>;
|
||||
$next[$i] = "" if !defined $next[$i];
|
||||
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
|
||||
}
|
||||
|
||||
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
|
||||
|
||||
if ($next[0] =~ /^\s*$/ &&
|
||||
$next[1] =~ /^\s*$/ &&
|
||||
$next[2] =~ /^\s*$/)
|
||||
{
|
||||
$blankcount -= 3;
|
||||
$lastwascut = 1;
|
||||
}
|
||||
|
||||
# Otherwise output the saved blanks, the current, and the next three
|
||||
# lines. Remember the last printed line.
|
||||
|
||||
else
|
||||
{
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] =~ s/.\x8//g;
|
||||
print $next[$i];
|
||||
$lastprinted = $_;
|
||||
}
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# This non-blank line is not preceded by 3 or more blank lines. Output
|
||||
# any blanks there are, and the line. Remember it. Force two blank lines
|
||||
# before headings.
|
||||
|
||||
else
|
||||
{
|
||||
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
|
||||
defined($lastprinted);
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# End
|
35
pcre2/Detrail
Executable file
35
pcre2/Detrail
Executable file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
# This is a script for removing trailing whitespace from lines in files that
|
||||
# are listed on the command line.
|
||||
|
||||
# This subroutine does the work for one file.
|
||||
|
||||
sub detrail {
|
||||
my($file) = $_[0];
|
||||
my($changed) = 0;
|
||||
open(IN, "$file") || die "Can't open $file for input";
|
||||
@lines = <IN>;
|
||||
close(IN);
|
||||
foreach (@lines)
|
||||
{
|
||||
if (/\s+\n$/)
|
||||
{
|
||||
s/\s+\n$/\n/;
|
||||
$changed = 1;
|
||||
}
|
||||
}
|
||||
if ($changed)
|
||||
{
|
||||
open(OUT, ">$file") || die "Can't open $file for output";
|
||||
print OUT @lines;
|
||||
close(OUT);
|
||||
}
|
||||
}
|
||||
|
||||
# This is the main program
|
||||
|
||||
$, = ""; # Output field separator
|
||||
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
|
||||
|
||||
# End
|
604
pcre2/HACKING
Normal file
604
pcre2/HACKING
Normal file
@ -0,0 +1,604 @@
|
||||
Technical Notes about PCRE2
|
||||
---------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE2 internals. PCRE2 is a library based on the original PCRE library,
|
||||
but with a revised (and incompatible) API. To avoid confusion, the original
|
||||
library is referred to as PCRE1 below. For information about testing PCRE2, see
|
||||
the pcre2test documentation and the comment at the head of the RunTest file.
|
||||
|
||||
PCRE1 releases were up to 8.3x when PCRE2 was developed. The 8.xx series will
|
||||
continue for bugfixes if necessary. PCRE2 releases started at 10.00 to avoid
|
||||
confusion with PCRE1.
|
||||
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
Many years ago I implemented some regular expression functions to an algorithm
|
||||
suggested by Martin Richards. These were not Unix-like in form, and were quite
|
||||
restricted in what they could do by comparison with Perl. The interesting part
|
||||
about the algorithm was that the amount of space required to hold the compiled
|
||||
form of an expression was known in advance. The code to apply an expression did
|
||||
not operate by backtracking, as the original Henry Spencer code and current
|
||||
PCRE2 and Perl code does, but instead checked all possibilities simultaneously
|
||||
by keeping a list of current states and checking all of them as it advanced
|
||||
through the subject string. In the terminology of Jeffrey Friedl's book, it was
|
||||
a "DFA algorithm", though it was not a traditional Finite State Machine (FSM).
|
||||
When the pattern was all used up, all remaining states were possible matches,
|
||||
and the one matching the longest subset of the subject string was chosen. This
|
||||
did not necessarily maximize the individual wild portions of the pattern, as is
|
||||
expected in Unix and Perl-style regular expressions.
|
||||
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that formed the original PCRE1 library (which are
|
||||
unrelated to those mentioned above), I tried at first to invent an algorithm
|
||||
that used an amount of store bounded by a multiple of the number of characters
|
||||
in the pattern, to save on compiling time. However, because of the greater
|
||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
||||
first pass through the pattern is helpful for other reasons.
|
||||
|
||||
|
||||
Support for 16-bit and 32-bit data strings
|
||||
-------------------------------------------
|
||||
|
||||
The library can be compiled in any combination of 8-bit, 16-bit or 32-bit
|
||||
modes, creating up to three different libraries. In the description that
|
||||
follows, the word "short" is used for a 16-bit data quantity, and the phrase
|
||||
"code unit" is used for a quantity that is a byte in 8-bit mode, a short in
|
||||
16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are
|
||||
given in generic form, without the _8, _16, or _32 suffix.
|
||||
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
--------------------------------------------
|
||||
|
||||
Up to and including release 6.7, PCRE1 worked by running a very degenerate
|
||||
first pass to calculate a maximum memory requirement, and then a second pass to
|
||||
do the real compile - which might use a bit less than the predicted amount of
|
||||
memory. The idea was that this would turn out faster than the Henry Spencer
|
||||
code because the first pass is degenerate and the second pass can just store
|
||||
stuff straight into memory, which it knows is big enough.
|
||||
|
||||
|
||||
Computing the memory requirement: how it is
|
||||
-------------------------------------------
|
||||
|
||||
By the time I was working on a potential 6.8 release, the degenerate first pass
|
||||
had become very complicated and hard to maintain. Indeed one of the early
|
||||
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
|
||||
I had a flash of inspiration as to how I could run the real compile function in
|
||||
a "fake" mode that enables it to compute how much memory it would need, while
|
||||
actually only ever using a few hundred bytes of working memory, and without too
|
||||
many tests of the mode that might slow it down. So I refactored the compiling
|
||||
functions to work this way. This got rid of about 600 lines of source. It
|
||||
should make future maintenance and development easier. As this was such a major
|
||||
change, I never released 6.8, instead upping the number to 7.0 (other quite
|
||||
major changes were also present in the 7.0 release).
|
||||
|
||||
A side effect of this work was that the previous limit of 200 on the nesting
|
||||
depth of parentheses was removed. However, there was a downside: compiling ran
|
||||
more slowly than before (30% or more, depending on the pattern) because it now
|
||||
did a full analysis of the pattern. My hope was that this would not be a big
|
||||
issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
system stack used by the compile function, which uses recursive function calls
|
||||
for nested parenthesized groups. This is a safety feature for environments with
|
||||
small stacks where the patterns are provided by users.
|
||||
|
||||
History repeated itself for release 10.20. A number of bugs relating to named
|
||||
subpatterns had been discovered by fuzzers. Most of these were related to the
|
||||
handling of forward references when it was not known if the named pattern was
|
||||
unique. (References to non-unique names use a different opcode and more
|
||||
memory.) The use of duplicate group numbers (the (?| facility) also caused
|
||||
issues.
|
||||
|
||||
To get around these problems I adopted a new approach by adding a third pass,
|
||||
really a "pre-pass", over the pattern, which does nothing other than identify
|
||||
all the named subpatterns and their corresponding group numbers. This means
|
||||
that the actual compile (both pre-pass and real compile) have full knowledge of
|
||||
group names and numbers throughout. Several dozen lines of messy code were
|
||||
eliminated, though the new pre-pass is not short (skipping over [] classes is
|
||||
complicated).
|
||||
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
||||
The "traditional", and original, matching function is called pcre2_match(), and
|
||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||
and the way that Perl works. This is not surprising, since it is intended to be
|
||||
as compatible with Perl as possible. This is the function most users of PCRE2
|
||||
will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
|
||||
support, and studying a compiled pattern with JIT is successful, the JIT code
|
||||
is run instead of the normal pcre2_match() code, but the result is the same.
|
||||
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
There is also a supplementary matching function called pcre2_dfa_match(). This
|
||||
implements a DFA matching algorithm that searches simultaneously for all
|
||||
possible matches that start at one point in the subject string. (Going back to
|
||||
my roots: see Historical Note 1 above.) This function intreprets the same
|
||||
compiled pattern data as pcre2_match(); however, not all the facilities are
|
||||
available, and those that are do not always work in quite the same way. See the
|
||||
user documentation for details.
|
||||
|
||||
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way. JIT
|
||||
support is not available for this kind of matching.
|
||||
|
||||
|
||||
Changeable options
|
||||
------------------
|
||||
|
||||
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
|
||||
some others) may change in the middle of patterns. Their processing is handled
|
||||
entirely at compile time by generating different opcodes for the different
|
||||
settings. The runtime functions do not need to keep track of an options state.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of unsigned code units (bytes in
|
||||
8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing
|
||||
items of variable length. The first code unit in an item contains an opcode,
|
||||
and the length of the item is either implicit in the opcode or contained in the
|
||||
data that follows it.
|
||||
|
||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
|
||||
only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
|
||||
and the 16-bit library can be compiled to use 4-byte values, though this
|
||||
impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
|
||||
necessary only when patterns whose compiled length is greater than 64K code
|
||||
units are going to be processed. When a LINK_SIZE value uses more than one code
|
||||
unit, the most significant unit is first.
|
||||
|
||||
In this description, we assume the "normal" compilation options. Data values
|
||||
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
|
||||
(most significant byte first), or one code unit in 16-bit and 32-bit modes.
|
||||
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one unit long
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single code unit, even in UTF-8/16 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
OP_CIRC ^ (start of data)
|
||||
OP_CIRCM ^ multiline mode (start of data or after newline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_HSPACE \H
|
||||
OP_HSPACE \h
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_VSPACE \V
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or newline at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before final newline)
|
||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ASSERT_ACCEPT )
|
||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||
OP_SKIP ) indicates which parentheses must be closed.
|
||||
OP_THEN )
|
||||
|
||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||
This ends the assertion, not the entire pattern match. The assertion (?!) is
|
||||
always optimized to OP_FAIL.
|
||||
|
||||
|
||||
Backtracking control verbs with optional data
|
||||
---------------------------------------------
|
||||
|
||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||
OP_MARK is followed by the mark name, preceded by a length in one code unit,
|
||||
and followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with
|
||||
arguments, the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used,
|
||||
with the name following in the same format as OP_MARK.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
---------------------------
|
||||
|
||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
||||
the character may be more than one code unit long. In UTF-32 mode, characters
|
||||
are always exactly one code unit long.
|
||||
|
||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||
for something like [^a]).
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
||||
The common repeats (*, +, ?), when applied to a single character, use the
|
||||
following opcodes, which come in caseful and caseless versions:
|
||||
|
||||
Caseful Caseless
|
||||
OP_STAR OP_STARI
|
||||
OP_MINSTAR OP_MINSTARI
|
||||
OP_POSSTAR OP_POSSTARI
|
||||
OP_PLUS OP_PLUSI
|
||||
OP_MINPLUS OP_MINPLUSI
|
||||
OP_POSPLUS OP_POSPLUSI
|
||||
OP_QUERY OP_QUERYI
|
||||
OP_MINQUERY OP_MINQUERYI
|
||||
OP_POSQUERY OP_POSQUERYI
|
||||
|
||||
Each opcode is followed by the character that is to be repeated. In ASCII or
|
||||
UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the
|
||||
length is variable. Those with "MIN" in their names are the minimizing
|
||||
versions. Those with "POS" in their names are possessive versions. Other kinds
|
||||
of repeat make use of these opcodes:
|
||||
|
||||
Caseful Caseless
|
||||
OP_UPTO OP_UPTOI
|
||||
OP_MINUPTO OP_MINUPTOI
|
||||
OP_POSUPTO OP_POSUPTOI
|
||||
OP_EXACT OP_EXACTI
|
||||
|
||||
Each of these is followed by a count and then the repeated character. The count
|
||||
is two bytes long in 8-bit mode (most significant byte first), or one code unit
|
||||
in 16-bit and 32-bit modes.
|
||||
|
||||
OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum
|
||||
and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or
|
||||
OP_MINUPTO or OPT_POSUPTO).
|
||||
|
||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||
positive single-character classes.
|
||||
|
||||
|
||||
Repeating character types
|
||||
-------------------------
|
||||
|
||||
Repeats of things like \d are done exactly as for single characters, except
|
||||
that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored
|
||||
in the next code unit. The opcodes are:
|
||||
|
||||
OP_TYPESTAR
|
||||
OP_TYPEMINSTAR
|
||||
OP_TYPEPOSSTAR
|
||||
OP_TYPEPLUS
|
||||
OP_TYPEMINPLUS
|
||||
OP_TYPEPOSPLUS
|
||||
OP_TYPEQUERY
|
||||
OP_TYPEMINQUERY
|
||||
OP_TYPEPOSQUERY
|
||||
OP_TYPEUPTO
|
||||
OP_TYPEMINUPTO
|
||||
OP_TYPEPOSUPTO
|
||||
OP_TYPEEXACT
|
||||
|
||||
|
||||
Match by Unicode property
|
||||
-------------------------
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two code units that encode the desired property as a type
|
||||
and a value. The types are a set of #defines of the form PT_xxx, and the values
|
||||
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
|
||||
The value is relevant only for PT_GC (General Category), PT_PC (Particular
|
||||
Category), and PT_SC (Script).
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||
something like [^a]).
|
||||
|
||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||
negated, single-character classes. The normal single-character opcodes
|
||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||
|
||||
When there is more than one character in a class, and all the code points are
|
||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||
bits are counted from the least significant end of each unit. In caseless mode,
|
||||
bits for both cases are set.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and
|
||||
16-bit and 32-bit modes, subject characters with values greater than 255 can be
|
||||
handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they
|
||||
do.
|
||||
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
|
||||
code points are less than 256, followed by a list of pairs (for a range) and/or
|
||||
single characters and/or properties. In caseless mode, both cases are
|
||||
explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
|
||||
opcode and its data. This is followed by a code unit containing flag bits:
|
||||
XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a
|
||||
bit map is present. There follows the bit map, if XCL_MAP is set, and then a
|
||||
sequence of items coded as follows:
|
||||
|
||||
XCL_END marks the end of the list
|
||||
XCL_SINGLE one character follows
|
||||
XCL_RANGE two characters follow
|
||||
XCL_PROP a Unicode property (type, value) follows
|
||||
XCL_NOTPROP a Unicode property (type, value) follows
|
||||
|
||||
If a range starts with a code point less than 256 and ends with one greater
|
||||
than 255, it is split into two ranges, with characters less than 256 being
|
||||
indicated in the bit map, and the rest with XCL_RANGE.
|
||||
|
||||
When XCL_NOT is set, the bit map, if present, contains bits for characters that
|
||||
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
|
||||
specifies characters and properties that are not allowed.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||
reference number when the reference is to a unique capturing group (either by
|
||||
number or by name). When named groups are used, there may be more than one
|
||||
group with the same name. In this case, a reference to such a group by name
|
||||
generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
|
||||
(not the byte offset) in the group name table of the first entry for the
|
||||
required name, followed by the number of groups with the same name. The
|
||||
matching code can then search for the first one that is set.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to other classes and also to back references. In both cases, the repeat
|
||||
information follows the base item. The matching code looks at the following
|
||||
opcode to see if it is one of these:
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPOSSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRPOSPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRPOSQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
OP_CRPOSRANGE
|
||||
|
||||
All but the last three are single-code-unit items, with no data. The others are
|
||||
followed by the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing round brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
myself, can be round, square, curly, or pointy. Hence this usage rather than
|
||||
"parentheses".]
|
||||
|
||||
Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A
|
||||
bracket opcode is followed by a LINK_SIZE value which gives the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset
|
||||
to the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||
number is a count that immediately follows the offset.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||
respectively (see below for possessive repetitions). All three are followed by
|
||||
a LINK_SIZE value giving (as a positive number) the offset back to the matching
|
||||
bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
single-unit opcodes that tell the matcher that skipping the following
|
||||
subpattern entirely is a valid match. In the case of the first two, not
|
||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||
because it may be called as a subroutine from elsewhere in the pattern.
|
||||
|
||||
A subpattern with an indefinite maximum repetition is replicated in the
|
||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
|
||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
|
||||
as appropriate.
|
||||
|
||||
A subpattern with a bounded maximum repetition is replicated in a nested
|
||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
|
||||
before each replication after the minimum, so that, for example, (abc){2,5} is
|
||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
|
||||
has the same number.
|
||||
|
||||
When a repeated subpattern has an unbounded upper limit, it is checked to see
|
||||
whether it could match an empty string. If this is the case, the opcode in the
|
||||
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Possessive brackets
|
||||
-------------------
|
||||
|
||||
When a repeated group (capturing or non-capturing) is marked as possessive by
|
||||
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
|
||||
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead
|
||||
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||
|
||||
|
||||
Once-only (atomic) groups
|
||||
-------------------------
|
||||
|
||||
These are just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||
within the atomic group; the latter when there are. The distinction is needed
|
||||
for when there is a backtrack to before the group - any captures within the
|
||||
group must be reset, so it is necessary to retain backtracking points inside
|
||||
the group, even after it is complete, in order to do this. When there are no
|
||||
captures in an atomic group, all the backtracking can be discarded when it is
|
||||
complete. This is more efficient, and also uses less stack.
|
||||
|
||||
The check for matching an empty string in an unbounded repeat is handled
|
||||
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are also just like other subpatterns, but starting with one
|
||||
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||
pointer in the subject string. In ASCII or UTF-32 mode, the count is also the
|
||||
number of code units, but in UTF-8/16 mode each character may occupy more than
|
||||
one code unit. A separate count is present in each alternative of a lookbehind
|
||||
assertion, allowing them to have different (but fixed) lengths.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
|
||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat.
|
||||
|
||||
If the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by a count containing the
|
||||
reference number, provided that the reference is to a unique capturing group.
|
||||
If the reference was by name and there is more than one group with that name,
|
||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||
names table, and the number of groups with the same name. The allows the
|
||||
matcher to check if any group with the given name is set.
|
||||
|
||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||
subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the
|
||||
whole pattern") or OP_DNRREF (with data as for OP_DNCREF).
|
||||
|
||||
For a DEFINE condition, OP_FALSE is used (with no associated data). During
|
||||
compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when
|
||||
the conditional group is complete, there can be a check to ensure that it
|
||||
contains only one top-level branch. Once this has happened, the opcode is
|
||||
changed to OP_FALSE, so the matcher never sees OP_DEFINE.
|
||||
|
||||
There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which
|
||||
tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE
|
||||
or OP_FALSE.
|
||||
|
||||
If a condition is not a back reference, recursion test, DEFINE, or VERSION, it
|
||||
must start with an assertion, whose opcode normally immediately follows OP_COND
|
||||
or OP_SCOND. However, if automatic callouts are enabled, a callout is inserted
|
||||
immediately before the assertion. It is also possible to insert a manual
|
||||
callout at this point. Only assertion conditions may have callouts preceding
|
||||
the condition.
|
||||
|
||||
A condition that is the negative assertion (?!) is optimized to OP_FAIL in all
|
||||
parts of the pattern, so this is another opcode that may appear as a condition.
|
||||
It is treated the same as OP_FALSE.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current pattern, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
|
||||
bracket from the start of the whole pattern. OP_RECURSE is also used for
|
||||
"subroutine" calls, even though they are not strictly a recursion. Repeated
|
||||
recursions are automatically wrapped inside OP_ONCE brackets, because otherwise
|
||||
some patterns broke them. A non-repeated recursion is not wrapped in OP_ONCE
|
||||
brackets, but it is nevertheless still treated as an atomic group.
|
||||
|
||||
|
||||
Callout
|
||||
-------
|
||||
|
||||
A callout can nowadays have either a numerical argument or a string argument.
|
||||
These use OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are
|
||||
followed by two LINK_SIZE values giving the offset in the pattern string to the
|
||||
start of the following item, and another count giving the length of this item.
|
||||
These values make it possible for pcre2test to output useful tracing
|
||||
information using callouts.
|
||||
|
||||
In the case of a numeric callout, after these two values there is a single code
|
||||
unit containing the callout number, in the range 0-255, with 255 being used for
|
||||
callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT
|
||||
option. Thus, this opcode item is of fixed length:
|
||||
|
||||
[OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER]
|
||||
|
||||
For callouts with string arguments, OP_CALLOUT_STR has three more data items:
|
||||
a LINK_SIZE value giving the complete length of the entire opcode item, a
|
||||
LINK_SIZE item containing the offset within the pattern string to the start of
|
||||
the string argument, and the string itself, preceded by its starting delimiter
|
||||
and followed by a binary zero. When a callout function is called, a pointer to
|
||||
the actual string is passed, but the delimiter can be accessed as string[-1] if
|
||||
the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is
|
||||
compiled as the following bytes (decimal numbers represent binary values):
|
||||
|
||||
[OP_CALLOUT] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0]
|
||||
-------- ------- -------- -------
|
||||
| | | |
|
||||
------- LINK_SIZE items ------
|
||||
|
||||
Opcode table checking
|
||||
---------------------
|
||||
|
||||
The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
|
||||
not a real opcode, but is used to check that tables indexed by opcode are the
|
||||
correct length, in order to catch updating errors.
|
||||
|
||||
Philip Hazel
|
||||
June 2015
|
370
pcre2/INSTALL
Normal file
370
pcre2/INSTALL
Normal file
@ -0,0 +1,370 @@
|
||||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. This file is offered as-is,
|
||||
without warranty of any kind.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell command `./configure && make && make install'
|
||||
should configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the `README' file for
|
||||
instructions specific to this package. Some packages provide this
|
||||
`INSTALL' file but do not implement all of the features documented
|
||||
below. The lack of an optional feature in a given package is not
|
||||
necessarily a bug. More recommendations for GNU packages can be found
|
||||
in *note Makefile Conventions: (standards)Makefile Conventions.
|
||||
|
||||
The `configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a `Makefile' in each directory of the package.
|
||||
It may also create one or more `.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script `config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file `config.log' containing compiler output (useful mainly for
|
||||
debugging `configure').
|
||||
|
||||
It can also use an optional file (typically called `config.cache'
|
||||
and enabled with `--cache-file=config.cache' or simply `-C') that saves
|
||||
the results of its tests to speed up reconfiguring. Caching is
|
||||
disabled by default to prevent problems with accidental use of stale
|
||||
cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how `configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the `README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point `config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file `configure.ac' (or `configure.in') is used to create
|
||||
`configure' by a program called `autoconf'. You need `configure.ac' if
|
||||
you want to change it or regenerate `configure' using a newer version
|
||||
of `autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. `cd' to the directory containing the package's source code and type
|
||||
`./configure' to configure the package for your system.
|
||||
|
||||
Running `configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type `make' to compile the package.
|
||||
|
||||
3. Optionally, type `make check' to run any self-tests that come with
|
||||
the package, generally using the just-built uninstalled binaries.
|
||||
|
||||
4. Type `make install' to install the programs and any data files and
|
||||
documentation. When installing into a prefix owned by root, it is
|
||||
recommended that the package be configured and built as a regular
|
||||
user, and only the `make install' phase executed with root
|
||||
privileges.
|
||||
|
||||
5. Optionally, type `make installcheck' to repeat any self-tests, but
|
||||
this time using the binaries in their final installed location.
|
||||
This target does not install anything. Running this target as a
|
||||
regular user, particularly if the prior `make install' required
|
||||
root privileges, verifies that the installation completed
|
||||
correctly.
|
||||
|
||||
6. You can remove the program binaries and object files from the
|
||||
source code directory by typing `make clean'. To also remove the
|
||||
files that `configure' created (so you can compile the package for
|
||||
a different kind of computer), type `make distclean'. There is
|
||||
also a `make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
7. Often, you can also type `make uninstall' to remove the installed
|
||||
files again. In practice, not all packages have tested that
|
||||
uninstallation works correctly, even though it is required by the
|
||||
GNU Coding Standards.
|
||||
|
||||
8. Some packages, particularly those that use Automake, provide `make
|
||||
distcheck', which can by used by developers to test that all other
|
||||
targets like `make install' and `make uninstall' work correctly.
|
||||
This target is generally not run by end users.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that
|
||||
the `configure' script does not know about. Run `./configure --help'
|
||||
for details on some of the pertinent environment variables.
|
||||
|
||||
You can give `configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here
|
||||
is an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the `configure' script. `configure' automatically checks for the
|
||||
source code in the directory that `configure' is in and in `..'. This
|
||||
is known as a "VPATH" build.
|
||||
|
||||
With a non-GNU `make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use `make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
On MacOS X 10.5 and later systems, you can create libraries and
|
||||
executables that work on multiple system types--known as "fat" or
|
||||
"universal" binaries--by specifying multiple `-arch' options to the
|
||||
compiler but only a single `-arch' option to the preprocessor. Like
|
||||
this:
|
||||
|
||||
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CPP="gcc -E" CXXCPP="g++ -E"
|
||||
|
||||
This is not guaranteed to produce working output in all cases, you
|
||||
may have to build one architecture at a time and combine the results
|
||||
using the `lipo' tool if you have problems.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, `make install' installs the package's commands under
|
||||
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||
can specify an installation prefix other than `/usr/local' by giving
|
||||
`configure' the option `--prefix=PREFIX', where PREFIX must be an
|
||||
absolute file name.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like `--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run `configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them. In general, the
|
||||
default for these options is expressed in terms of `${prefix}', so that
|
||||
specifying just `--prefix' will affect all of the other directory
|
||||
specifications that were not explicitly provided.
|
||||
|
||||
The most portable way to affect installation locations is to pass the
|
||||
correct locations to `configure'; however, many packages provide one or
|
||||
both of the following shortcuts of passing variable assignments to the
|
||||
`make install' command line to change installation locations without
|
||||
having to reconfigure or recompile.
|
||||
|
||||
The first method involves providing an override variable for each
|
||||
affected directory. For example, `make install
|
||||
prefix=/alternate/directory' will choose an alternate location for all
|
||||
directory configuration variables that were expressed in terms of
|
||||
`${prefix}'. Any directories that were specified during `configure',
|
||||
but not in terms of `${prefix}', must each be overridden at install
|
||||
time for the entire installation to be relocated. The approach of
|
||||
makefile variable overrides for each directory variable is required by
|
||||
the GNU Coding Standards, and ideally causes no recompilation.
|
||||
However, some platforms have known limitations with the semantics of
|
||||
shared libraries that end up requiring recompilation when using this
|
||||
method, particularly noticeable in packages that use GNU Libtool.
|
||||
|
||||
The second method involves providing the `DESTDIR' variable. For
|
||||
example, `make install DESTDIR=/alternate/directory' will prepend
|
||||
`/alternate/directory' before all installation names. The approach of
|
||||
`DESTDIR' overrides is not required by the GNU Coding Standards, and
|
||||
does not work on platforms that have drive letters. On the other hand,
|
||||
it does better at avoiding recompilation issues, and works well even
|
||||
when some directory options were not specified in terms of `${prefix}'
|
||||
at `configure' time.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving `configure' the
|
||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||
|
||||
Some packages pay attention to `--enable-FEATURE' options to
|
||||
`configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||
is something like `gnu-as' or `x' (for the X Window System). The
|
||||
`README' should mention any `--enable-' and `--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, `configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the `configure' options `--x-includes=DIR' and
|
||||
`--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Some packages offer the ability to configure how verbose the
|
||||
execution of `make' will be. For these packages, running `./configure
|
||||
--enable-silent-rules' sets the default to minimal output, which can be
|
||||
overridden with `make V=1'; while running `./configure
|
||||
--disable-silent-rules' sets the default to verbose, which can be
|
||||
overridden with `make V=0'.
|
||||
|
||||
Particular systems
|
||||
==================
|
||||
|
||||
On HP-UX, the default C compiler is not ANSI C compatible. If GNU
|
||||
CC is not installed, it is recommended to use the following options in
|
||||
order to use an ANSI C compiler:
|
||||
|
||||
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
|
||||
|
||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||
|
||||
HP-UX `make' updates targets which have the same time stamps as
|
||||
their prerequisites, which makes it generally unusable when shipped
|
||||
generated files such as `configure' are involved. Use GNU `make'
|
||||
instead.
|
||||
|
||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
|
||||
a workaround. If GNU CC is not installed, it is therefore recommended
|
||||
to try
|
||||
|
||||
./configure CC="cc"
|
||||
|
||||
and if that doesn't work, try
|
||||
|
||||
./configure CC="cc -nodtk"
|
||||
|
||||
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
|
||||
directory contains several dysfunctional programs; working variants of
|
||||
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
|
||||
in your `PATH', put it _after_ `/usr/bin'.
|
||||
|
||||
On Haiku, software installed for all users goes in `/boot/common',
|
||||
not `/usr/local'. It is recommended to use the following options:
|
||||
|
||||
./configure --prefix=/boot/common
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features `configure' cannot figure out
|
||||
automatically, but needs to determine by the type of machine the package
|
||||
will run on. Usually, assuming the package is built to be run on the
|
||||
_same_ architectures, `configure' can figure that out, but if it prints
|
||||
a message saying it cannot guess the machine type, give it the
|
||||
`--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as `sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS
|
||||
KERNEL-OS
|
||||
|
||||
See the file `config.sub' for the possible values of each field. If
|
||||
`config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option `--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with `--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for `configure' scripts to share,
|
||||
you can create a site shell script called `config.site' that gives
|
||||
default values for variables like `CC', `cache_file', and `prefix'.
|
||||
`configure' looks for `PREFIX/share/config.site' if it exists, then
|
||||
`PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
`CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all `configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to `configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the `configure' command line, using `VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified `gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||
an Autoconf limitation. Until the limitation is lifted, you can use
|
||||
this workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
`configure' Invocation
|
||||
======================
|
||||
|
||||
`configure' recognizes the following options to control how it
|
||||
operates.
|
||||
|
||||
`--help'
|
||||
`-h'
|
||||
Print a summary of all of the options to `configure', and exit.
|
||||
|
||||
`--help=short'
|
||||
`--help=recursive'
|
||||
Print a summary of the options unique to this package's
|
||||
`configure', and exit. The `short' variant lists options used
|
||||
only in the top level, while the `recursive' variant lists options
|
||||
also present in any nested packages.
|
||||
|
||||
`--version'
|
||||
`-V'
|
||||
Print the version of Autoconf used to generate the `configure'
|
||||
script, and exit.
|
||||
|
||||
`--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally `config.cache'. FILE defaults to `/dev/null' to
|
||||
disable caching.
|
||||
|
||||
`--config-cache'
|
||||
`-C'
|
||||
Alias for `--cache-file=config.cache'.
|
||||
|
||||
`--quiet'
|
||||
`--silent'
|
||||
`-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to `/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
`--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
`configure' can determine that directory automatically.
|
||||
|
||||
`--prefix=DIR'
|
||||
Use DIR as the installation prefix. *note Installation Names::
|
||||
for more details, including other options available for fine-tuning
|
||||
the installation locations.
|
||||
|
||||
`--no-create'
|
||||
`-n'
|
||||
Run the configure checks, but stop before creating any output
|
||||
files.
|
||||
|
||||
`configure' also accepts some other, not widely useful, options. Run
|
||||
`configure --help' for more details.
|
83
pcre2/LICENCE
Normal file
83
pcre2/LICENCE
Normal file
@ -0,0 +1,83 @@
|
||||
PCRE2 LICENCE
|
||||
-------------
|
||||
|
||||
PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE2, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself. The data
|
||||
in the testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a just-in-time compiler that can be used to
|
||||
optimize pattern matching. This is an optional feature that can be omitted when
|
||||
the library is built.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2015 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
||||
--------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2015 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2015 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
End
|
774
pcre2/Makefile.am
Normal file
774
pcre2/Makefile.am
Normal file
@ -0,0 +1,774 @@
|
||||
## Process this file with automake to produce Makefile.in.
|
||||
|
||||
AUTOMAKE_OPTIONS = subdir-objects
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
|
||||
|
||||
## Specify the documentation files that are distributed.
|
||||
|
||||
dist_doc_DATA = \
|
||||
AUTHORS \
|
||||
COPYING \
|
||||
ChangeLog \
|
||||
LICENCE \
|
||||
NEWS \
|
||||
README \
|
||||
doc/pcre2.txt \
|
||||
doc/pcre2-config.txt \
|
||||
doc/pcre2grep.txt \
|
||||
doc/pcre2test.txt
|
||||
|
||||
dist_html_DATA = \
|
||||
doc/html/NON-AUTOTOOLS-BUILD.txt \
|
||||
doc/html/README.txt \
|
||||
doc/html/index.html \
|
||||
doc/html/pcre2-config.html \
|
||||
doc/html/pcre2.html \
|
||||
doc/html/pcre2_callout_enumerate.html \
|
||||
doc/html/pcre2_code_free.html \
|
||||
doc/html/pcre2_compile.html \
|
||||
doc/html/pcre2_compile_context_copy.html \
|
||||
doc/html/pcre2_compile_context_create.html \
|
||||
doc/html/pcre2_compile_context_free.html \
|
||||
doc/html/pcre2_config.html \
|
||||
doc/html/pcre2_dfa_match.html \
|
||||
doc/html/pcre2_general_context_copy.html \
|
||||
doc/html/pcre2_general_context_create.html \
|
||||
doc/html/pcre2_general_context_free.html \
|
||||
doc/html/pcre2_get_error_message.html \
|
||||
doc/html/pcre2_get_mark.html \
|
||||
doc/html/pcre2_get_ovector_count.html \
|
||||
doc/html/pcre2_get_ovector_pointer.html \
|
||||
doc/html/pcre2_get_startchar.html \
|
||||
doc/html/pcre2_jit_compile.html \
|
||||
doc/html/pcre2_jit_free_unused_memory.html \
|
||||
doc/html/pcre2_jit_match.html \
|
||||
doc/html/pcre2_jit_stack_assign.html \
|
||||
doc/html/pcre2_jit_stack_create.html \
|
||||
doc/html/pcre2_jit_stack_free.html \
|
||||
doc/html/pcre2_maketables.html \
|
||||
doc/html/pcre2_match.html \
|
||||
doc/html/pcre2_match_context_copy.html \
|
||||
doc/html/pcre2_match_context_create.html \
|
||||
doc/html/pcre2_match_context_free.html \
|
||||
doc/html/pcre2_match_data_create.html \
|
||||
doc/html/pcre2_match_data_create_from_pattern.html \
|
||||
doc/html/pcre2_match_data_free.html \
|
||||
doc/html/pcre2_pattern_info.html \
|
||||
doc/html/pcre2_serialize_decode.html \
|
||||
doc/html/pcre2_serialize_encode.html \
|
||||
doc/html/pcre2_serialize_free.html \
|
||||
doc/html/pcre2_serialize_get_number_of_codes.html \
|
||||
doc/html/pcre2_set_bsr.html \
|
||||
doc/html/pcre2_set_callout.html \
|
||||
doc/html/pcre2_set_character_tables.html \
|
||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||
doc/html/pcre2_set_match_limit.html \
|
||||
doc/html/pcre2_set_newline.html \
|
||||
doc/html/pcre2_set_parens_nest_limit.html \
|
||||
doc/html/pcre2_set_recursion_limit.html \
|
||||
doc/html/pcre2_set_recursion_memory_management.html \
|
||||
doc/html/pcre2_substitute.html \
|
||||
doc/html/pcre2_substring_copy_byname.html \
|
||||
doc/html/pcre2_substring_copy_bynumber.html \
|
||||
doc/html/pcre2_substring_free.html \
|
||||
doc/html/pcre2_substring_get_byname.html \
|
||||
doc/html/pcre2_substring_get_bynumber.html \
|
||||
doc/html/pcre2_substring_length_byname.html \
|
||||
doc/html/pcre2_substring_length_bynumber.html \
|
||||
doc/html/pcre2_substring_list_free.html \
|
||||
doc/html/pcre2_substring_list_get.html \
|
||||
doc/html/pcre2_substring_nametable_scan.html \
|
||||
doc/html/pcre2_substring_number_from_name.html \
|
||||
doc/html/pcre2api.html \
|
||||
doc/html/pcre2build.html \
|
||||
doc/html/pcre2callout.html \
|
||||
doc/html/pcre2compat.html \
|
||||
doc/html/pcre2demo.html \
|
||||
doc/html/pcre2grep.html \
|
||||
doc/html/pcre2jit.html \
|
||||
doc/html/pcre2limits.html \
|
||||
doc/html/pcre2matching.html \
|
||||
doc/html/pcre2partial.html \
|
||||
doc/html/pcre2pattern.html \
|
||||
doc/html/pcre2perform.html \
|
||||
doc/html/pcre2posix.html \
|
||||
doc/html/pcre2sample.html \
|
||||
doc/html/pcre2serialize.html \
|
||||
doc/html/pcre2stack.html \
|
||||
doc/html/pcre2syntax.html \
|
||||
doc/html/pcre2test.html \
|
||||
doc/html/pcre2unicode.html
|
||||
|
||||
dist_man_MANS = \
|
||||
doc/pcre2-config.1 \
|
||||
doc/pcre2.3 \
|
||||
doc/pcre2_callout_enumerate.3 \
|
||||
doc/pcre2_code_free.3 \
|
||||
doc/pcre2_compile.3 \
|
||||
doc/pcre2_compile_context_copy.3 \
|
||||
doc/pcre2_compile_context_create.3 \
|
||||
doc/pcre2_compile_context_free.3 \
|
||||
doc/pcre2_config.3 \
|
||||
doc/pcre2_dfa_match.3 \
|
||||
doc/pcre2_general_context_copy.3 \
|
||||
doc/pcre2_general_context_create.3 \
|
||||
doc/pcre2_general_context_free.3 \
|
||||
doc/pcre2_get_error_message.3 \
|
||||
doc/pcre2_get_mark.3 \
|
||||
doc/pcre2_get_ovector_count.3 \
|
||||
doc/pcre2_get_ovector_pointer.3 \
|
||||
doc/pcre2_get_startchar.3 \
|
||||
doc/pcre2_jit_compile.3 \
|
||||
doc/pcre2_jit_free_unused_memory.3 \
|
||||
doc/pcre2_jit_match.3 \
|
||||
doc/pcre2_jit_stack_assign.3 \
|
||||
doc/pcre2_jit_stack_create.3 \
|
||||
doc/pcre2_jit_stack_free.3 \
|
||||
doc/pcre2_maketables.3 \
|
||||
doc/pcre2_match.3 \
|
||||
doc/pcre2_match_context_copy.3 \
|
||||
doc/pcre2_match_context_create.3 \
|
||||
doc/pcre2_match_context_free.3 \
|
||||
doc/pcre2_match_data_create.3 \
|
||||
doc/pcre2_match_data_create_from_pattern.3 \
|
||||
doc/pcre2_match_data_free.3 \
|
||||
doc/pcre2_pattern_info.3 \
|
||||
doc/pcre2_serialize_decode.3 \
|
||||
doc/pcre2_serialize_encode.3 \
|
||||
doc/pcre2_serialize_free.3 \
|
||||
doc/pcre2_serialize_get_number_of_codes.3 \
|
||||
doc/pcre2_set_bsr.3 \
|
||||
doc/pcre2_set_callout.3 \
|
||||
doc/pcre2_set_character_tables.3 \
|
||||
doc/pcre2_set_compile_recursion_guard.3 \
|
||||
doc/pcre2_set_match_limit.3 \
|
||||
doc/pcre2_set_newline.3 \
|
||||
doc/pcre2_set_parens_nest_limit.3 \
|
||||
doc/pcre2_set_recursion_limit.3 \
|
||||
doc/pcre2_set_recursion_memory_management.3 \
|
||||
doc/pcre2_substitute.3 \
|
||||
doc/pcre2_substring_copy_byname.3 \
|
||||
doc/pcre2_substring_copy_bynumber.3 \
|
||||
doc/pcre2_substring_free.3 \
|
||||
doc/pcre2_substring_get_byname.3 \
|
||||
doc/pcre2_substring_get_bynumber.3 \
|
||||
doc/pcre2_substring_length_byname.3 \
|
||||
doc/pcre2_substring_length_bynumber.3 \
|
||||
doc/pcre2_substring_list_free.3 \
|
||||
doc/pcre2_substring_list_get.3 \
|
||||
doc/pcre2_substring_nametable_scan.3 \
|
||||
doc/pcre2_substring_number_from_name.3 \
|
||||
doc/pcre2api.3 \
|
||||
doc/pcre2build.3 \
|
||||
doc/pcre2callout.3 \
|
||||
doc/pcre2compat.3 \
|
||||
doc/pcre2demo.3 \
|
||||
doc/pcre2grep.1 \
|
||||
doc/pcre2jit.3 \
|
||||
doc/pcre2limits.3 \
|
||||
doc/pcre2matching.3 \
|
||||
doc/pcre2partial.3 \
|
||||
doc/pcre2pattern.3 \
|
||||
doc/pcre2perform.3 \
|
||||
doc/pcre2posix.3 \
|
||||
doc/pcre2sample.3 \
|
||||
doc/pcre2serialize.3 \
|
||||
doc/pcre2stack.3 \
|
||||
doc/pcre2syntax.3 \
|
||||
doc/pcre2test.1 \
|
||||
doc/pcre2unicode.3
|
||||
|
||||
# The Libtool libraries to install. We'll add to this later.
|
||||
|
||||
lib_LTLIBRARIES =
|
||||
|
||||
# Unit tests you want to run when people type 'make check'.
|
||||
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
|
||||
|
||||
TESTS =
|
||||
check_SCRIPTS =
|
||||
dist_noinst_SCRIPTS =
|
||||
|
||||
# Some of the binaries we make are to be installed, and others are
|
||||
# (non-user-visible) helper programs needed to build the libraries.
|
||||
|
||||
bin_PROGRAMS =
|
||||
noinst_PROGRAMS =
|
||||
|
||||
# Additional files to delete on 'make clean', 'make distclean',
|
||||
# and 'make maintainer-clean'.
|
||||
|
||||
CLEANFILES =
|
||||
DISTCLEANFILES = src/config.h.in~ config.h
|
||||
MAINTAINERCLEANFILES =
|
||||
|
||||
# Additional files to bundle with the distribution, over and above what
|
||||
# the Autotools include by default.
|
||||
|
||||
EXTRA_DIST =
|
||||
|
||||
# These files contain additional m4 macros that are used by autoconf.
|
||||
|
||||
EXTRA_DIST += \
|
||||
m4/ax_pthread.m4 m4/pcre2_visibility.m4
|
||||
|
||||
# These files contain maintenance information
|
||||
|
||||
EXTRA_DIST += \
|
||||
NON-AUTOTOOLS-BUILD \
|
||||
HACKING
|
||||
|
||||
# These files are used in the preparation of a release
|
||||
|
||||
EXTRA_DIST += \
|
||||
PrepareRelease \
|
||||
CheckMan \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
132html \
|
||||
doc/index.html.src
|
||||
|
||||
# These files are usable versions of pcre2.h and config.h that are distributed
|
||||
# for the benefit of people who are building PCRE2 manually, without the
|
||||
# Autotools support.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/pcre2.h.generic \
|
||||
src/config.h.generic
|
||||
|
||||
# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE
|
||||
# version number. Therefore, we can create the generic version just by copying.
|
||||
|
||||
src/pcre2.h.generic: src/pcre2.h.in configure.ac
|
||||
rm -f $@
|
||||
cp -p src/pcre2.h $@
|
||||
|
||||
# It is more complicated for config.h.generic. We need the version that results
|
||||
# from a default configuration so as to get all the default values for PCRE
|
||||
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
|
||||
# doing a configure in a temporary directory. However, some trickery is needed,
|
||||
# because the source directory may already be configured. If you just try
|
||||
# running configure in a new directory, it complains. For this reason, we move
|
||||
# config.status out of the way while doing the default configuration. The
|
||||
# resulting config.h is munged by perl to put #ifdefs round any #defines for
|
||||
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
|
||||
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
|
||||
# sure that PCRE2_EXP_DEFN is unset (in case it has visibility settings).
|
||||
|
||||
src/config.h.generic: configure.ac
|
||||
rm -rf $@ _generic
|
||||
mkdir _generic
|
||||
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
|
||||
cd _generic && $(abs_top_srcdir)/configure || :
|
||||
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
|
||||
test -f _generic/src/config.h
|
||||
perl -n \
|
||||
-e 'BEGIN{$$blank=0;}' \
|
||||
-e 'if(/PCRE2_EXP_DEFN/){print"/* #undef PCRE2_EXP_DEFN */\n";$$blank=0;next;}' \
|
||||
-e 'if(/to make a symbol visible/){next;}' \
|
||||
-e 'if(/__attribute__ \(\(visibility/){next;}' \
|
||||
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
|
||||
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
|
||||
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
|
||||
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
|
||||
_generic/src/config.h >$@
|
||||
rm -rf _generic
|
||||
|
||||
MAINTAINERCLEANFILES += src/pcre2.h.generic src/config.h.generic
|
||||
|
||||
# These are the header files we'll install. We do not distribute pcre2.h
|
||||
# because it is generated from pcre2.h.in.
|
||||
|
||||
nodist_include_HEADERS = src/pcre2.h
|
||||
include_HEADERS = src/pcre2posix.h
|
||||
|
||||
# This is the "config" script.
|
||||
|
||||
bin_SCRIPTS = pcre2-config
|
||||
|
||||
## ---------------------------------------------------------------
|
||||
## The dftables program is used to rebuild character tables before compiling
|
||||
## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
|
||||
## program. The default (when --enable-rebuild-chartables is not specified) is
|
||||
## to copy a distributed set of tables that are defined for ASCII code. In this
|
||||
## case, dftables is not needed.
|
||||
|
||||
if WITH_REBUILD_CHARTABLES
|
||||
noinst_PROGRAMS += dftables
|
||||
dftables_SOURCES = src/dftables.c
|
||||
src/pcre2_chartables.c: dftables$(EXEEXT)
|
||||
rm -f $@
|
||||
./dftables$(EXEEXT) $@
|
||||
else
|
||||
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
|
||||
rm -f $@
|
||||
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
|
||||
endif # WITH_REBUILD_CHARTABLES
|
||||
|
||||
BUILT_SOURCES = src/pcre2_chartables.c
|
||||
NODIST_SOURCES = src/pcre2_chartables.c
|
||||
|
||||
## Define the list of common sources, then arrange to build whichever of the
|
||||
## 8-, 16-, or 32-bit libraries are configured.
|
||||
|
||||
COMMON_SOURCES = \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
src/pcre2_maketables.c \
|
||||
src/pcre2_match.c \
|
||||
src/pcre2_match_data.c \
|
||||
src/pcre2_newline.c \
|
||||
src/pcre2_ord2utf.c \
|
||||
src/pcre2_pattern_info.c \
|
||||
src/pcre2_serialize.c \
|
||||
src/pcre2_string_utils.c \
|
||||
src/pcre2_study.c \
|
||||
src/pcre2_substitute.c \
|
||||
src/pcre2_substring.c \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_xclass.c
|
||||
|
||||
if WITH_PCRE2_8
|
||||
lib_LTLIBRARIES += libpcre2-8.la
|
||||
libpcre2_8_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
nodist_libpcre2_8_la_SOURCES = \
|
||||
$(NODIST_SOURCES)
|
||||
libpcre2_8_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=8 \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(AM_CFLAGS)
|
||||
libpcre2_8_la_LIBADD =
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
if WITH_PCRE2_16
|
||||
lib_LTLIBRARIES += libpcre2-16.la
|
||||
libpcre2_16_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
nodist_libpcre2_16_la_SOURCES = \
|
||||
$(NODIST_SOURCES)
|
||||
libpcre2_16_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=16 \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(AM_CFLAGS)
|
||||
libpcre2_16_la_LIBADD =
|
||||
endif # WITH_PCRE2_16
|
||||
|
||||
if WITH_PCRE2_32
|
||||
lib_LTLIBRARIES += libpcre2-32.la
|
||||
libpcre2_32_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
nodist_libpcre2_32_la_SOURCES = \
|
||||
$(NODIST_SOURCES)
|
||||
libpcre2_32_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=32 \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(AM_CFLAGS)
|
||||
libpcre2_32_la_LIBADD =
|
||||
endif # WITH_PCRE2_32
|
||||
|
||||
# The pcre2_chartables.c.dist file is the default version of
|
||||
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.
|
||||
|
||||
EXTRA_DIST += src/pcre2_chartables.c.dist
|
||||
CLEANFILES += src/pcre2_chartables.c
|
||||
|
||||
# The JIT compiler lives in a separate directory, but its files are #included
|
||||
# when pcre2_jit_compile.c is processed, so they must be distributed.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/sljit/sljitConfig.h \
|
||||
src/sljit/sljitConfigInternal.h \
|
||||
src/sljit/sljitExecAllocator.c \
|
||||
src/sljit/sljitLir.c \
|
||||
src/sljit/sljitLir.h \
|
||||
src/sljit/sljitNativeARM_32.c \
|
||||
src/sljit/sljitNativeARM_64.c \
|
||||
src/sljit/sljitNativeARM_T2_32.c \
|
||||
src/sljit/sljitNativeMIPS_32.c \
|
||||
src/sljit/sljitNativeMIPS_64.c \
|
||||
src/sljit/sljitNativeMIPS_common.c \
|
||||
src/sljit/sljitNativePPC_32.c \
|
||||
src/sljit/sljitNativePPC_64.c \
|
||||
src/sljit/sljitNativePPC_common.c \
|
||||
src/sljit/sljitNativeSPARC_32.c \
|
||||
src/sljit/sljitNativeSPARC_common.c \
|
||||
src/sljit/sljitNativeTILEGX-encoder.c \
|
||||
src/sljit/sljitNativeTILEGX_64.c \
|
||||
src/sljit/sljitNativeX86_32.c \
|
||||
src/sljit/sljitNativeX86_64.c \
|
||||
src/sljit/sljitNativeX86_common.c \
|
||||
src/sljit/sljitUtils.c
|
||||
|
||||
# Some of the JIT sources are also in separate files that are #included.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/pcre2_jit_match.c \
|
||||
src/pcre2_jit_misc.c
|
||||
|
||||
if WITH_PCRE2_8
|
||||
libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS)
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS)
|
||||
endif # WITH_PCRE2_32
|
||||
|
||||
if WITH_VALGRIND
|
||||
if WITH_PCRE2_8
|
||||
libpcre2_8_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
libpcre2_16_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
libpcre2_32_la_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_PCRE2_32
|
||||
endif # WITH_VALGRIND
|
||||
|
||||
if WITH_GCOV
|
||||
if WITH_PCRE2_8
|
||||
libpcre2_8_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
libpcre2_16_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
libpcre2_32_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_PCRE2_32
|
||||
endif # WITH_GCOV
|
||||
|
||||
## A version of the 8-bit library that has a POSIX API.
|
||||
|
||||
if WITH_PCRE2_8
|
||||
lib_LTLIBRARIES += libpcre2-posix.la
|
||||
libpcre2_posix_la_SOURCES = src/pcre2posix.c
|
||||
libpcre2_posix_la_CFLAGS = \
|
||||
-DPCRE2_CODE_UNIT_WIDTH=8 \
|
||||
$(VISIBILITY_CFLAGS) $(AM_CFLAGS)
|
||||
libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
||||
libpcre2_posix_la_LIBADD = libpcre2-8.la
|
||||
if WITH_GCOV
|
||||
libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS)
|
||||
endif # WITH_GCOV
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
## Build pcre2grep if the 8-bit library is enabled
|
||||
|
||||
if WITH_PCRE2_8
|
||||
bin_PROGRAMS += pcre2grep
|
||||
pcre2grep_SOURCES = src/pcre2grep.c
|
||||
pcre2grep_CFLAGS = $(AM_CFLAGS)
|
||||
pcre2grep_LDADD = $(LIBZ) $(LIBBZ2)
|
||||
pcre2grep_LDADD += libpcre2-8.la
|
||||
if WITH_GCOV
|
||||
pcre2grep_CFLAGS += $(GCOV_CFLAGS)
|
||||
pcre2grep_LDADD += $(GCOV_LIBS)
|
||||
endif # WITH_GCOV
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
## -------- Testing ----------
|
||||
|
||||
## If JIT support is enabled, arrange for the JIT test program to run.
|
||||
|
||||
if WITH_JIT
|
||||
TESTS += pcre2_jit_test
|
||||
noinst_PROGRAMS += pcre2_jit_test
|
||||
pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
|
||||
pcre2_jit_test_CFLAGS = $(AM_CFLAGS)
|
||||
pcre2_jit_test_LDADD =
|
||||
if WITH_PCRE2_8
|
||||
pcre2_jit_test_LDADD += libpcre2-8.la
|
||||
endif # WITH_PCRE2_8
|
||||
if WITH_PCRE2_16
|
||||
pcre2_jit_test_LDADD += libpcre2-16.la
|
||||
endif # WITH_PCRE2_16
|
||||
if WITH_PCRE2_32
|
||||
pcre2_jit_test_LDADD += libpcre2-32.la
|
||||
endif # WITH_PCRE2_32
|
||||
if WITH_GCOV
|
||||
pcre2_jit_test_CFLAGS += $(GCOV_CFLAGS)
|
||||
pcre2_jit_test_LDADD += $(GCOV_LIBS)
|
||||
endif # WITH_GCOV
|
||||
endif # WITH_JIT
|
||||
|
||||
# Build the general pcre2test program. The file src/pcre2_printint.c is
|
||||
# #included by pcre2test as many times as needed, at different code unit
|
||||
# widths.
|
||||
|
||||
bin_PROGRAMS += pcre2test
|
||||
EXTRA_DIST += src/pcre2_printint.c
|
||||
pcre2test_SOURCES = src/pcre2test.c
|
||||
pcre2test_CFLAGS = $(AM_CFLAGS)
|
||||
pcre2test_LDADD = $(LIBREADLINE)
|
||||
|
||||
if WITH_PCRE2_8
|
||||
pcre2test_LDADD += libpcre2-8.la libpcre2-posix.la
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
if WITH_PCRE2_16
|
||||
pcre2test_LDADD += libpcre2-16.la
|
||||
endif # WITH_PCRE2_16
|
||||
|
||||
if WITH_PCRE2_32
|
||||
pcre2test_LDADD += libpcre2-32.la
|
||||
endif # WITH_PCRE2_32
|
||||
|
||||
if WITH_VALGRIND
|
||||
pcre2test_CFLAGS += $(VALGRIND_CFLAGS)
|
||||
endif # WITH_VALGRIND
|
||||
|
||||
if WITH_GCOV
|
||||
pcre2test_CFLAGS += $(GCOV_CFLAGS)
|
||||
pcre2test_LDADD += $(GCOV_LIBS)
|
||||
endif # WITH_GCOV
|
||||
|
||||
## The main library tests. Each test is a binary plus a script that runs that
|
||||
## binary in various ways. We install these test binaries in case folks find it
|
||||
## helpful.
|
||||
|
||||
TESTS += RunTest
|
||||
dist_noinst_SCRIPTS += RunTest
|
||||
|
||||
EXTRA_DIST += RunTest.bat
|
||||
|
||||
## When the 8-bit library is configured, pcre2grep will have been built.
|
||||
|
||||
if WITH_PCRE2_8
|
||||
TESTS += RunGrepTest
|
||||
dist_noinst_SCRIPTS += RunGrepTest
|
||||
endif # WITH_PCRE2_8
|
||||
|
||||
## Distribute all the test data files
|
||||
|
||||
EXTRA_DIST += \
|
||||
testdata/grepbinary \
|
||||
testdata/grepfilelist \
|
||||
testdata/grepinput \
|
||||
testdata/grepinput3 \
|
||||
testdata/grepinput8 \
|
||||
testdata/grepinputv \
|
||||
testdata/grepinputx \
|
||||
testdata/greplist \
|
||||
testdata/grepoutput \
|
||||
testdata/grepoutput8 \
|
||||
testdata/grepoutputN \
|
||||
testdata/greppatN4 \
|
||||
testdata/testinput1 \
|
||||
testdata/testinput2 \
|
||||
testdata/testinput3 \
|
||||
testdata/testinput4 \
|
||||
testdata/testinput5 \
|
||||
testdata/testinput6 \
|
||||
testdata/testinput7 \
|
||||
testdata/testinput8 \
|
||||
testdata/testinput9 \
|
||||
testdata/testinput10 \
|
||||
testdata/testinput11 \
|
||||
testdata/testinput12 \
|
||||
testdata/testinput13 \
|
||||
testdata/testinput14 \
|
||||
testdata/testinput15 \
|
||||
testdata/testinput16 \
|
||||
testdata/testinput17 \
|
||||
testdata/testinput18 \
|
||||
testdata/testinput19 \
|
||||
testdata/testinputEBC \
|
||||
testdata/testoutput1 \
|
||||
testdata/testoutput2 \
|
||||
testdata/testoutput3 \
|
||||
testdata/testoutput3A \
|
||||
testdata/testoutput3B \
|
||||
testdata/testoutput4 \
|
||||
testdata/testoutput5 \
|
||||
testdata/testoutput6 \
|
||||
testdata/testoutput7 \
|
||||
testdata/testoutput8-16 \
|
||||
testdata/testoutput8-32 \
|
||||
testdata/testoutput8-8 \
|
||||
testdata/testoutput9 \
|
||||
testdata/testoutput10 \
|
||||
testdata/testoutput11-16 \
|
||||
testdata/testoutput11-32 \
|
||||
testdata/testoutput12-16 \
|
||||
testdata/testoutput12-32 \
|
||||
testdata/testoutput13 \
|
||||
testdata/testoutput14 \
|
||||
testdata/testoutput15 \
|
||||
testdata/testoutput16 \
|
||||
testdata/testoutput17 \
|
||||
testdata/testoutput18 \
|
||||
testdata/testoutput19 \
|
||||
testdata/testoutputEBC \
|
||||
testdata/wintestinput3 \
|
||||
testdata/wintestoutput3 \
|
||||
perltest.sh
|
||||
|
||||
# RunTest and RunGrepTest should clean up after themselves, but just in case
|
||||
# they don't, add their working files to CLEANFILES.
|
||||
|
||||
CLEANFILES += \
|
||||
testSinput \
|
||||
test3input \
|
||||
test3output \
|
||||
test3outputA \
|
||||
test3outputB \
|
||||
testtry \
|
||||
teststdout \
|
||||
teststderr \
|
||||
teststderrgrep \
|
||||
testtemp1grep \
|
||||
testtemp2grep \
|
||||
testtrygrep \
|
||||
testNinputgrep
|
||||
|
||||
## ------------ End of testing -------------
|
||||
|
||||
|
||||
# PCRE2 demonstration program. Not built automatcally. The point is that the
|
||||
# users should build it themselves. So just distribute the source.
|
||||
|
||||
EXTRA_DIST += src/pcre2demo.c
|
||||
|
||||
|
||||
# We have .pc files for pkg-config users.
|
||||
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA =
|
||||
|
||||
if WITH_PCRE2_8
|
||||
pkgconfig_DATA += libpcre2-8.pc libpcre2-posix.pc
|
||||
endif
|
||||
|
||||
if WITH_PCRE2_16
|
||||
pkgconfig_DATA += libpcre2-16.pc
|
||||
endif
|
||||
|
||||
if WITH_PCRE2_32
|
||||
pkgconfig_DATA += libpcre2-32.pc
|
||||
endif
|
||||
|
||||
|
||||
# gcov/lcov code coverage reporting
|
||||
#
|
||||
# Coverage reporting targets:
|
||||
#
|
||||
# coverage: Create a coverage report from 'make check'
|
||||
# coverage-baseline: Capture baseline coverage information
|
||||
# coverage-reset: This zeros the coverage counters only
|
||||
# coverage-report: This creates the coverage report only
|
||||
# coverage-clean-report: This removes the generated coverage report
|
||||
# without cleaning the coverage data itself
|
||||
# coverage-clean-data: This removes the captured coverage data without
|
||||
# removing the coverage files created at compile time (*.gcno)
|
||||
# coverage-clean: This cleans all coverage data including the generated
|
||||
# coverage report.
|
||||
|
||||
if WITH_GCOV
|
||||
COVERAGE_TEST_NAME = $(PACKAGE)
|
||||
COVERAGE_NAME = $(PACKAGE)-$(VERSION)
|
||||
COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info
|
||||
COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage
|
||||
COVERAGE_LCOV_EXTRA_FLAGS =
|
||||
COVERAGE_GENHTML_EXTRA_FLAGS =
|
||||
|
||||
coverage_quiet = $(coverage_quiet_$(V))
|
||||
coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY))
|
||||
coverage_quiet_0 = --quiet
|
||||
|
||||
coverage-check: all
|
||||
-$(MAKE) $(AM_MAKEFLAGS) -k check
|
||||
|
||||
coverage-baseline:
|
||||
$(LCOV) $(coverage_quiet) \
|
||||
--directory $(top_builddir) \
|
||||
--output-file "$(COVERAGE_OUTPUT_FILE)" \
|
||||
--capture \
|
||||
--initial
|
||||
|
||||
coverage-report:
|
||||
$(LCOV) $(coverage_quiet) \
|
||||
--directory $(top_builddir) \
|
||||
--capture \
|
||||
--output-file "$(COVERAGE_OUTPUT_FILE).tmp" \
|
||||
--test-name "$(COVERAGE_TEST_NAME)" \
|
||||
--no-checksum \
|
||||
--compat-libtool \
|
||||
$(COVERAGE_LCOV_EXTRA_FLAGS)
|
||||
$(LCOV) $(coverage_quiet) \
|
||||
--directory $(top_builddir) \
|
||||
--output-file "$(COVERAGE_OUTPUT_FILE)" \
|
||||
--remove "$(COVERAGE_OUTPUT_FILE).tmp" \
|
||||
"/tmp/*" \
|
||||
"/usr/include/*" \
|
||||
"$(includedir)/*"
|
||||
-@rm -f "$(COVERAGE_OUTPUT_FILE).tmp"
|
||||
LANG=C $(GENHTML) $(coverage_quiet) \
|
||||
--prefix $(top_builddir) \
|
||||
--output-directory "$(COVERAGE_OUTPUT_DIR)" \
|
||||
--title "$(PACKAGE) $(VERSION) Code Coverage Report" \
|
||||
--show-details "$(COVERAGE_OUTPUT_FILE)" \
|
||||
--legend \
|
||||
$(COVERAGE_GENHTML_EXTRA_FLAGS)
|
||||
@echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html"
|
||||
|
||||
coverage-reset:
|
||||
-$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir)
|
||||
|
||||
coverage-clean-report:
|
||||
-rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp"
|
||||
-rm -rf "$(COVERAGE_OUTPUT_DIR)"
|
||||
|
||||
coverage-clean-data:
|
||||
-find $(top_builddir) -name "*.gcda" -delete
|
||||
|
||||
coverage-clean: coverage-reset coverage-clean-report coverage-clean-data
|
||||
-find $(top_builddir) -name "*.gcno" -delete
|
||||
|
||||
coverage-distclean: coverage-clean
|
||||
|
||||
coverage: coverage-reset coverage-baseline coverage-check coverage-report
|
||||
clean-local: coverage-clean
|
||||
distclean-local: coverage-distclean
|
||||
|
||||
.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean
|
||||
|
||||
# Without coverage support, still arrange for 'make distclean' to get rid of
|
||||
# any coverage files that may have been left from a different configuration.
|
||||
|
||||
else
|
||||
|
||||
coverage:
|
||||
@echo "Configuring with --enable-coverage is required to generate code coverage report."
|
||||
|
||||
DISTCLEANFILES += src/*.gcda src/*.gcno
|
||||
|
||||
distclean-local:
|
||||
rm -rf $(PACKAGE)-$(VERSION)-coverage*
|
||||
|
||||
endif # WITH_GCOV
|
||||
|
||||
## CMake support
|
||||
|
||||
EXTRA_DIST += \
|
||||
cmake/COPYING-CMAKE-SCRIPTS \
|
||||
cmake/FindPackageHandleStandardArgs.cmake \
|
||||
cmake/FindReadline.cmake \
|
||||
cmake/FindEditline.cmake \
|
||||
CMakeLists.txt \
|
||||
config-cmake.h.in
|
||||
|
||||
## end Makefile.am
|
3087
pcre2/Makefile.in
Normal file
3087
pcre2/Makefile.in
Normal file
File diff suppressed because it is too large
Load Diff
47
pcre2/NEWS
Normal file
47
pcre2/NEWS
Normal file
@ -0,0 +1,47 @@
|
||||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
||||
1. Callouts with string arguments and the pcre2_callout_enumerate() function
|
||||
have been implemented.
|
||||
|
||||
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
|
||||
|
||||
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
|
||||
subject in multiline mode.
|
||||
|
||||
4. The way named subpatterns are handled has been refactored. The previous
|
||||
approach had several bugs.
|
||||
|
||||
5. The handling of \c in EBCDIC environments has been changed to conform to the
|
||||
perlebcdic document. This is an incompatible change.
|
||||
|
||||
6. Bugs have been mended, many of them discovered by fuzzers.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
1. Serialization and de-serialization functions have been added to the API,
|
||||
making it possible to save and restore sets of compiled patterns, though
|
||||
restoration must be done in the same environment that was used for compilation.
|
||||
|
||||
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
|
||||
creator to specify that JIT is not to be used.
|
||||
|
||||
3. A number of bugs have been fixed. In particular, bugs that caused building
|
||||
on Windows using CMake to fail have been mended.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36. New programs are recommended to use the
|
||||
new library. Programs that use the original (PCRE1) API will need changing
|
||||
before linking with the new library.
|
||||
|
||||
****
|
391
pcre2/NON-AUTOTOOLS-BUILD
Normal file
391
pcre2/NON-AUTOTOOLS-BUILD
Normal file
@ -0,0 +1,391 @@
|
||||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
This document has been converted from the PCRE1 document. I have removed a
|
||||
number of sections about building in various environments, as they applied only
|
||||
to PCRE1 and are probably out of date.
|
||||
|
||||
This document contains the following sections:
|
||||
|
||||
General
|
||||
Generic instructions for the PCRE2 C library
|
||||
Stack size in Windows environments
|
||||
Linking programs in Windows environments
|
||||
Calling conventions in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building PCRE2 on Windows with CMake
|
||||
Testing with RunTest.bat
|
||||
Building PCRE2 on native z/OS and z/VM
|
||||
|
||||
|
||||
GENERAL
|
||||
|
||||
The basic PCRE2 library consists entirely of code written in Standard C, and so
|
||||
should compile successfully on any system that has a Standard C compiler and
|
||||
library.
|
||||
|
||||
The PCRE2 distribution includes a "configure" file for use by the
|
||||
configure/make (autotools) build system, as found in many Unix-like
|
||||
environments. The README file contains information about the options for
|
||||
"configure".
|
||||
|
||||
There is also support for CMake, which some users prefer, especially in Windows
|
||||
environments, though it can also be run in Unix-like environments. See the
|
||||
section entitled "Building PCRE2 on Windows with CMake" below.
|
||||
|
||||
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
|
||||
under the names src/config.h.generic and src/pcre2.h.generic. These are
|
||||
provided for those who build PCRE2 without using "configure" or CMake. If you
|
||||
use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
|
||||
|
||||
The following are generic instructions for building the PCRE2 C library "by
|
||||
hand". If you are going to use CMake, this section does not apply to you; you
|
||||
can skip ahead to the CMake section.
|
||||
|
||||
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
|
||||
macro settings that it contains to whatever is appropriate for your
|
||||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators.
|
||||
|
||||
When you compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
||||
An alternative approach is not to edit src/config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need to the
|
||||
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||
|
||||
NOTE: There have been occasions when the way in which certain parameters
|
||||
in src/config.h are used has changed between releases. (In the
|
||||
configure/make world, this is handled automatically.) When upgrading to a
|
||||
new release, you are strongly advised to review src/config.h.generic
|
||||
before re-using what you had previously.
|
||||
|
||||
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
|
||||
|
||||
(3) EITHER:
|
||||
Copy or rename file src/pcre2_chartables.c.dist as
|
||||
src/pcre2_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
|
||||
if you have set up src/config.h), and then run it with the single
|
||||
argument "src/pcre2_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on a
|
||||
system that uses EBCDIC code.
|
||||
|
||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||
specify alternative tables at run time.
|
||||
|
||||
(4) For an 8-bit library, compile the following source files from the src
|
||||
directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
|
||||
set -DHAVE_CONFIG_H if you have set up src/config.h with your
|
||||
configuration, or else use other -D settings to change the configuration
|
||||
as required.
|
||||
|
||||
pcre2_auto_possess.c
|
||||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
pcre2_match.c
|
||||
pcre2_match_data.c
|
||||
pcre2_newline.c
|
||||
pcre2_ord2utf.c
|
||||
pcre2_pattern_info.c
|
||||
pcre2_serialize.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
pcre2_substitute.c
|
||||
pcre2_substring.c
|
||||
pcre2_tables.c
|
||||
pcre2_ucd.c
|
||||
pcre2_valid_utf.c
|
||||
pcre2_xclass.c
|
||||
|
||||
Make sure that you include -I. in the compiler command (or equivalent for
|
||||
an unusual compiler) so that all included PCRE2 header files are first
|
||||
sought in the src directory under the current directory. Otherwise you run
|
||||
the risk of picking up a previously-installed file from somewhere else.
|
||||
|
||||
Note that you must compile pcre2_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in src/config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre2_compile.c #includes other files from the sljit subdirectory, where
|
||||
there should be 16 files, all of whose names begin with "sljit". It also
|
||||
#includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should
|
||||
not compile these yourself.
|
||||
|
||||
(5) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
|
||||
If your system has static and shared libraries, you may have to do this
|
||||
once for each type.
|
||||
|
||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the src/pcre2posix.h file and then
|
||||
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||
library.
|
||||
|
||||
(8) The pcre2test program can be linked with any combination of the 8-bit,
|
||||
16-bit and 32-bit libraries (depending on what you selected in
|
||||
src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
|
||||
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
|
||||
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
|
||||
needs the pcre2posix wrapper library.
|
||||
|
||||
(9) Run pcre2test on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. There are
|
||||
comments about what each test does in the section entitled "Testing PCRE2"
|
||||
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||
32-bit libraries, you need to run pcre2test with the -16 option to do
|
||||
16-bit tests and with the -32 option to do 32-bit tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options are selected.
|
||||
For example, test 4 is for Unicode support, and will not run if you have
|
||||
built PCRE2 without it. See the comments at the start of each testinput
|
||||
file. If you have a suitable Unix-like shell, the RunTest script will run
|
||||
the appropriate tests for you. The command "RunTest list" will output a
|
||||
list of all the tests.
|
||||
|
||||
Note that the supplied files are in Unix format, with just LF characters
|
||||
as line terminators. You may need to edit them to change this if your
|
||||
system uses a different convention.
|
||||
|
||||
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
|
||||
by running pcre2test with the -jit option. This is done automatically by
|
||||
the RunTest script. You might also like to build and run the freestanding
|
||||
JIT test program, src/pcre2_jit_test.c.
|
||||
|
||||
(11) If you want to use the pcre2grep command, compile and link
|
||||
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
|
||||
need the pcre2posix library).
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
||||
The default processor stack size of 1Mb in some Windows environments is too
|
||||
small for matching patterns that need much recursion. In particular, test 2 may
|
||||
fail because of this. Normally, running out of stack causes a crash, but there
|
||||
have been cases where the test program has just died silently. See your linker
|
||||
documentation for how to increase stack size if you experience problems. If you
|
||||
are using CMake (see "BUILDING PCRE2 ON WINDOWS WITH CMAKE" below) and the gcc
|
||||
compiler, you can increase the stack size for pcre2test and pcre2grep by
|
||||
setting the CMAKE_EXE_LINKER_FLAGS variable to "-Wl,--stack,8388608" (for
|
||||
example). The Linux default of 8Mb is a reasonable choice for the stack, though
|
||||
even that can be too small for some pattern/subject combinations.
|
||||
|
||||
PCRE2 has a compile configuration option to disable the use of stack for
|
||||
recursion so that heap is used instead. However, pattern matching is
|
||||
significantly slower when this is done. There is more about stack usage in the
|
||||
"pcre2stack" documentation.
|
||||
|
||||
|
||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
If you want to statically link a program against a PCRE2 library in the form of
|
||||
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
|
||||
|
||||
|
||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
It is possible to compile programs to use different calling conventions using
|
||||
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||
easier to change the calling convention for the exported functions in the
|
||||
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
|
||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||
not set, it defaults to empty; the default calling convention is then used
|
||||
(which is what is wanted most of the time).
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
|
||||
|
||||
There are two ways of building PCRE2 using the "configure, make, make install"
|
||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||
the same thing; they are completely different from each other. There is also
|
||||
support for building using CMake, which some users find a more straightforward
|
||||
way of building PCRE2 under Windows.
|
||||
|
||||
The MinGW home page (http://www.mingw.org/) says this:
|
||||
|
||||
MinGW: A collection of freely available and freely distributable Windows
|
||||
specific header files and import libraries combined with GNU toolsets that
|
||||
allow one to produce native Windows programs that do not rely on any
|
||||
3rd-party C runtime DLLs.
|
||||
|
||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||
|
||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||
|
||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||
substantial Linux API functionality
|
||||
|
||||
. A collection of tools which provide Linux look and feel.
|
||||
|
||||
On both MinGW and Cygwin, PCRE2 should build correctly using:
|
||||
|
||||
./configure && make && make install
|
||||
|
||||
This should create two libraries called libpcre2-8 and libpcre2-posix. These
|
||||
are independent libraries: when you link with libpcre2-posix you must also link
|
||||
with libpcre2-8, which contains the basic functions.
|
||||
|
||||
Using Cygwin's compiler generates libraries and executables that depend on
|
||||
cygwin1.dll. If a library that is generated this way is distributed,
|
||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||
licence, this forces not only PCRE2 to be under the GPL, but also the entire
|
||||
application. A distributor who wants to keep their own code proprietary must
|
||||
purchase an appropriate Cygwin licence.
|
||||
|
||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||
executable that can run standalone on Windows without any third party dll or
|
||||
licensing issues.
|
||||
|
||||
But there is more complication:
|
||||
|
||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||
gcc and MinGW's gcc). So, a user can:
|
||||
|
||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||
-mno-cygwin.
|
||||
|
||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||
compiler flags.
|
||||
|
||||
The test files that are supplied with PCRE2 are in UNIX format, with LF
|
||||
characters as line terminators. Unless your PCRE2 library uses a default
|
||||
newline option that includes LF as a valid newline, it may be necessary to
|
||||
change the line terminators in the test files to get some of the tests to work.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH CMAKE
|
||||
|
||||
CMake is an alternative configuration facility that can be used instead of
|
||||
"configure". CMake creates project files (make files, solution files, etc.)
|
||||
tailored to numerous development environments, including Visual Studio,
|
||||
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
cache can be deleted by selecting "File > Delete Cache".
|
||||
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
|
||||
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
|
||||
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||
|
||||
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
8. The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
|
||||
10. Hit "Generate".
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
|
||||
12. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
|
||||
TESTING WITH RUNTEST.BAT
|
||||
|
||||
If configured with CMake, building the test project ("make test" or building
|
||||
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
|
||||
on your configuration options, possibly other test programs) in the build
|
||||
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
|
||||
exe paths.
|
||||
|
||||
For manual testing with RunTest.bat, provided the build dir is a subdirectory
|
||||
of the source directory: Open command shell window. Chdir to the location
|
||||
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
|
||||
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
|
||||
|
||||
To run only a particular test with RunTest.Bat provide a test number argument.
|
||||
|
||||
Otherwise:
|
||||
|
||||
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
|
||||
have been created.
|
||||
|
||||
2. Edit RunTest.bat to indentify the full or relative location of
|
||||
the pcre2 source (wherein which the testdata folder resides), e.g.:
|
||||
|
||||
set srcdir=C:\pcre2\pcre2-10.00
|
||||
|
||||
3. In a Windows command environment, chdir to the location of your bat and
|
||||
exe programs.
|
||||
|
||||
4. Run RunTest.bat. Test outputs will automatically be compared to expected
|
||||
results, and discrepancies will be identified in the console output.
|
||||
|
||||
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
|
||||
|
||||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||
applications can be supported through UNIX System Services, and in such an
|
||||
environment PCRE2 can be built in the same way as in other systems. However, in
|
||||
native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||
required. For details, please see this web site:
|
||||
|
||||
http://www.zaconsultants.net
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
|
||||
You may also download PCRE1 from WWW.CBTTAPE.ORG, file 882. Everything, source
|
||||
and executable, is in EBCDIC and native z/OS file formats and this is the
|
||||
recommended download site.
|
||||
|
||||
=============================
|
||||
Last Updated: 15 June 2015
|
239
pcre2/PrepareRelease
Executable file
239
pcre2/PrepareRelease
Executable file
@ -0,0 +1,239 @@
|
||||
#/bin/sh
|
||||
|
||||
# Script to prepare the files for building a PCRE2 release. It does some
|
||||
# processing of the documentation, detrails files, and creates pcre2.h.generic
|
||||
# and config.h.generic (for use by builders who can't run ./configure).
|
||||
|
||||
# You must run this script before runnning "make dist". If its first argument
|
||||
# is "doc", it stops after preparing the documentation. There are no other
|
||||
# arguments. The script makes use of the following files:
|
||||
|
||||
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
|
||||
# "knows" the relevant troff constructs that are used in the PCRE2
|
||||
# man pages.
|
||||
|
||||
# CheckMan A Perl script that checks man pages for typos in the mark up.
|
||||
|
||||
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
|
||||
# removing backspaces and other redundant text so as to produce
|
||||
# a readable .txt file.
|
||||
|
||||
# Detrail A Perl script that removes trailing spaces from files.
|
||||
|
||||
# doc/index.html.src
|
||||
# A file that is copied as index.html into the doc/html directory
|
||||
# when the HTML documentation is built. It works like this so that
|
||||
# doc/html can be deleted and re-created from scratch.
|
||||
|
||||
# README & NON-AUTOTOOLS-BUILD
|
||||
# These files are copied into the doc/html directory, with .txt
|
||||
# extensions so that they can by hyperlinked from the HTML
|
||||
# documentation, because some people just go to the HTML without
|
||||
# looking for text files.
|
||||
|
||||
|
||||
# First, sort out the documentation. Remove pcre2demo.3 first because it won't
|
||||
# pass the markup check (it is created below, using markup that none of the
|
||||
# other pages use).
|
||||
|
||||
cd doc
|
||||
echo Processing documentation
|
||||
|
||||
/bin/rm -f pcre2demo.3
|
||||
|
||||
# Check the remaining man pages
|
||||
|
||||
perl ../CheckMan *.1 *.3
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
# Make Text form of the documentation. It needs some mangling to make it
|
||||
# tidy for online reading. Concatenate all the .3 stuff, but omit the
|
||||
# individual function pages.
|
||||
|
||||
cat <<End >pcre2.txt
|
||||
-----------------------------------------------------------------------------
|
||||
This file contains a concatenation of the PCRE2 man pages, converted to plain
|
||||
text format for ease of searching with a text editor, or for use on systems
|
||||
that do not have a man page processor. The small individual files that give
|
||||
synopses of each function in the library have not been included. Neither has
|
||||
the pcre2demo program. There are separate text files for the pcre2grep and
|
||||
pcre2test commands.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
End
|
||||
|
||||
echo "Making pcre2.txt"
|
||||
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
|
||||
pcre2limits pcre2matching pcre2partial pcre2unicode ; do
|
||||
|
||||
#for file in \
|
||||
# pcre2syntax \
|
||||
# pcre2precompile pcre2perform pcre2posix pcre2sample \
|
||||
# pcre2stack ; do
|
||||
|
||||
echo " Processing $file.3"
|
||||
nroff -c -man $file.3 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
|
||||
/bin/rm $file.rawtxt
|
||||
echo "------------------------------------------------------------------------------" >>pcre2.txt
|
||||
if [ "$file" != "pcre2sample" ] ; then
|
||||
echo " " >>pcre2.txt
|
||||
echo " " >>pcre2.txt
|
||||
fi
|
||||
done
|
||||
|
||||
# The three commands
|
||||
for file in pcre2test pcre2grep pcre2-config ; do
|
||||
echo Making $file.txt
|
||||
nroff -c -man $file.1 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >$file.txt
|
||||
/bin/rm $file.rawtxt
|
||||
done
|
||||
|
||||
|
||||
# Make pcre2demo.3 from the pcre2demo.c source file
|
||||
|
||||
echo "Making pcre2demo.3"
|
||||
perl <<"END" >pcre2demo.3
|
||||
open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
|
||||
open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
|
||||
print OUT ".\\\" Start example.\n" .
|
||||
".de EX\n" .
|
||||
". nr mE \\\\n(.f\n" .
|
||||
". nf\n" .
|
||||
". nh\n" .
|
||||
". ft CW\n" .
|
||||
"..\n" .
|
||||
".\n" .
|
||||
".\n" .
|
||||
".\\\" End example.\n" .
|
||||
".de EE\n" .
|
||||
". ft \\\\n(mE\n" .
|
||||
". fi\n" .
|
||||
". hy \\\\n(HY\n" .
|
||||
"..\n" .
|
||||
".\n" .
|
||||
".EX\n" ;
|
||||
while (<IN>)
|
||||
{
|
||||
s/\\/\\e/g;
|
||||
print OUT;
|
||||
}
|
||||
print OUT ".EE\n";
|
||||
close(IN);
|
||||
close(OUT);
|
||||
END
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# Make HTML form of the documentation.
|
||||
|
||||
echo "Making HTML documentation"
|
||||
/bin/rm html/*
|
||||
cp index.html.src html/index.html
|
||||
cp ../README html/README.txt
|
||||
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
|
||||
|
||||
for file in *.1 ; do
|
||||
base=`basename $file .1`
|
||||
echo " Making $base.html"
|
||||
perl ../132html -toc $base <$file >html/$base.html
|
||||
done
|
||||
|
||||
# Exclude table of contents for function summaries. It seems that expr
|
||||
# forces an anchored regex. Also exclude them for small pages that have
|
||||
# only one section.
|
||||
|
||||
for file in *.3 ; do
|
||||
base=`basename $file .3`
|
||||
toc=-toc
|
||||
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
|
||||
if [ "$base" = "pcre2sample" ] || \
|
||||
[ "$base" = "pcre2stack" ] || \
|
||||
[ "$base" = "pcre2compat" ] || \
|
||||
[ "$base" = "pcre2limits" ] || \
|
||||
[ "$base" = "pcre2perform" ] || \
|
||||
[ "$base" = "pcre2unicode" ] ; then
|
||||
toc=""
|
||||
fi
|
||||
echo " Making $base.html"
|
||||
perl ../132html $toc $base <$file >html/$base.html
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
done
|
||||
|
||||
# End of documentation processing; stop if only documentation required.
|
||||
|
||||
cd ..
|
||||
echo Documentation done
|
||||
if [ "$1" = "doc" ] ; then exit; fi
|
||||
|
||||
# These files are detrailed; do not detrail the test data because there may be
|
||||
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
|
||||
# line endings and the detrail script removes all trailing white space. The
|
||||
# configure files are also omitted from the detrailing.
|
||||
|
||||
files="\
|
||||
Makefile.am \
|
||||
configure.ac \
|
||||
README \
|
||||
LICENCE \
|
||||
COPYING \
|
||||
AUTHORS \
|
||||
NEWS \
|
||||
NON-AUTOTOOLS-BUILD \
|
||||
INSTALL \
|
||||
132html \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
ChangeLog \
|
||||
CMakeLists.txt \
|
||||
RunGrepTest \
|
||||
RunTest \
|
||||
pcre2-config.in \
|
||||
perltest.sh \
|
||||
libpcre2-8.pc.in \
|
||||
libpcre2-16.pc.in \
|
||||
libpcre2-32.pc.in \
|
||||
libpcre2-posix.pc.in \
|
||||
src/dftables.c \
|
||||
src/pcre2.h.in \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
src/pcre2_jit_match.c \
|
||||
src/pcre2_jit_misc.c \
|
||||
src/pcre2_jit_test.c \
|
||||
src/pcre2_maketables.c \
|
||||
src/pcre2_match.c \
|
||||
src/pcre2_match_data.c \
|
||||
src/pcre2_newline.c \
|
||||
src/pcre2_ord2utf.c \
|
||||
src/pcre2_pattern_info.c \
|
||||
src/pcre2_printint.c \
|
||||
src/pcre2_string_utils.c \
|
||||
src/pcre2_study.c \
|
||||
src/pcre2_substring.c \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_xclass.c \
|
||||
src/pcre2demo.c \
|
||||
src/pcre2grep.c \
|
||||
src/pcre2posix.c \
|
||||
src/pcre2posix.h \
|
||||
src/pcre2test.c"
|
||||
|
||||
echo Detrailing
|
||||
perl ./Detrail $files doc/p* doc/html/*
|
||||
|
||||
echo Done
|
||||
|
||||
#End
|
835
pcre2/README
Normal file
835
pcre2/README
Normal file
@ -0,0 +1,835 @@
|
||||
README file for PCRE2 (Perl-compatible regular expression library)
|
||||
------------------------------------------------------------------
|
||||
|
||||
PCRE2 is a re-working of the original PCRE library to provide an entirely new
|
||||
API. The latest release of PCRE2 is always available in three alternative
|
||||
formats from:
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
subscribe or manage your subscription here:
|
||||
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
|
||||
The PCRE2 APIs
|
||||
Documentation for PCRE2
|
||||
Contributions by users of PCRE2
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
Building PCRE2 without using autotools
|
||||
Building PCRE2 using autotools
|
||||
Retrieving configuration information
|
||||
Shared libraries
|
||||
Cross-compiling using autotools
|
||||
Making new tarballs
|
||||
Testing PCRE2
|
||||
Character tables
|
||||
File manifest
|
||||
|
||||
|
||||
The PCRE2 APIs
|
||||
--------------
|
||||
|
||||
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. There are no C++
|
||||
wrappers.
|
||||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||
man page). These can be found in a library called libpcre2posix. Note that this
|
||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||
and does not give full access to all of PCRE2's facilities.
|
||||
|
||||
The header file for the POSIX-style functions is called pcre2posix.h. The
|
||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||
renamed or pointed at by a link.
|
||||
|
||||
If you are using the POSIX interface to PCRE2 and there is already a POSIX
|
||||
regex library installed on your system, as well as worrying about the regex.h
|
||||
header file (as mentioned above), you must also take care when linking programs
|
||||
to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
|
||||
pick up the POSIX functions of the same name from the other library.
|
||||
|
||||
One way of avoiding this confusion is to compile PCRE2 with the addition of
|
||||
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
|
||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
|
||||
effect of renaming the functions so that the names no longer clash. Of course,
|
||||
you have to do the same thing for your applications, or write them using the
|
||||
new names.
|
||||
|
||||
|
||||
Documentation for PCRE2
|
||||
-----------------------
|
||||
|
||||
If you install PCRE2 in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre2". The one that is
|
||||
just called "pcre2" lists all the others. In addition to these man pages, the
|
||||
PCRE2 documentation is supplied in two other forms:
|
||||
|
||||
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
|
||||
doc/pcre2test.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except the
|
||||
listing of pcre2demo.c and those that summarize individual functions. The
|
||||
other two are the text forms of the section 1 man pages for the pcre2grep
|
||||
and pcre2test commands. These text forms are provided for ease of scanning
|
||||
with text editors or similar tools. They are installed in
|
||||
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
|
||||
(defaulting to /usr/local).
|
||||
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
doc/html and installed in <prefix>/share/doc/pcre2/html.
|
||||
|
||||
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
---------------------------------------
|
||||
|
||||
For a non-Unix-like system, please read the comments in the file
|
||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||
"make" you may be able to build PCRE2 using autotools in the same way as for
|
||||
many Unix-like systems.
|
||||
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||
|
||||
PCRE2 has been compiled on many different operating systems. It should be
|
||||
straightforward to build PCRE2 on any system that has a Standard C compiler and
|
||||
library, because it uses only Standard C functions.
|
||||
|
||||
|
||||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
The use of autotools (in particular, libtool) is problematic in some
|
||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||
file for ways of building PCRE2 without using autotools.
|
||||
|
||||
|
||||
Building PCRE2 using autotools
|
||||
------------------------------
|
||||
|
||||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
||||
To build PCRE2 on system that supports autotools, first run the "configure"
|
||||
command from the PCRE2 distribution directory, with your current directory set
|
||||
to the directory where you want the files to be created. This command is a
|
||||
standard GNU "autoconf" configuration script, for which generic instructions
|
||||
are supplied in the file INSTALL.
|
||||
|
||||
Most commonly, people build PCRE2 within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
This command specifies that the C compiler should be run with the flags '-O2
|
||||
-Wall' instead of the default, and that "make install" should install PCRE2
|
||||
under /opt/local instead of the default /usr/local.
|
||||
|
||||
If you want to build in a different directory, just run "configure" with that
|
||||
directory as current. For example, suppose you have unpacked the PCRE2 source
|
||||
into /source/pcre2/pcre2-xxx, but you want to build it in
|
||||
/build/pcre2/pcre2-xxx:
|
||||
|
||||
cd /build/pcre2/pcre2-xxx
|
||||
/source/pcre2/pcre2-xxx/configure
|
||||
|
||||
PCRE2 is written in C and is normally compiled as a C library. However, it is
|
||||
possible to build it as a C++ library, though the provided building apparatus
|
||||
does not have any features to support this.
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE2
|
||||
library. They are also documented in the pcre2build man page.
|
||||
|
||||
. By default, both shared and static libraries are built. You can change this
|
||||
by adding one of these options to the "configure" command:
|
||||
|
||||
--disable-shared
|
||||
--disable-static
|
||||
|
||||
(See also "Shared libraries on Unix-like systems" below.)
|
||||
|
||||
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
|
||||
the "configure" command, the 16-bit library is also built. If you add
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
|
||||
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
|
||||
to disable building the 8-bit library.
|
||||
|
||||
. If you want to include support for just-in-time compiling, which can give
|
||||
large performance improvements on certain platforms, add --enable-jit to the
|
||||
"configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
||||
. If you do not want to make use of the support for UTF-8 Unicode character
|
||||
strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit
|
||||
library, or UTF-32 Unicode character strings in the 32-bit library, you can
|
||||
add --disable-unicode to the "configure" command. This reduces the size of
|
||||
the libraries. It is not possible to configure one library with Unicode
|
||||
support, and another without, in the same configuration.
|
||||
|
||||
When Unicode support is available, the use of a UTF encoding still has to be
|
||||
enabled by setting the PCRE2_UTF option at run time or starting a pattern
|
||||
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
|
||||
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. It is
|
||||
not possible to use both --enable-unicode and --enable-ebcdic at the same
|
||||
time.
|
||||
|
||||
As well as supporting UTF strings, Unicode support includes support for the
|
||||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
Escape sequences such as \d and \w in patterns do not by default make use of
|
||||
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
|
||||
or starting a pattern with (*UCP).
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE2 can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr, --enable-newline-is-lf,
|
||||
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
the standard tests will fail, because the lines in the test files end with
|
||||
LF. Even if the files are edited to change the line endings, there are likely
|
||||
to be some failures. With --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||
failures.
|
||||
|
||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||
sequence. This is independent of the option specifying what PCRE2 considers
|
||||
to be the end of a line (see above). However, the caller of PCRE2 can
|
||||
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
||||
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||
|
||||
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre2_match() can supply their own value. There is more discussion on the
|
||||
pcre2api man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
essentially "unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-recursion=500000
|
||||
|
||||
Recursive function calls use up the runtime stack; running out of stack can
|
||||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcre2stack man page.
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||
offsets. Increasing the internal link size reduces performance in the 8-bit
|
||||
and 16-bit libraries. In the 32-bit library, the link size setting is
|
||||
ignored, as 4-byte offsets are always used.
|
||||
|
||||
. You can build PCRE2 so that its internal match() function that is called from
|
||||
pcre2_match() does not call itself recursively. Instead, it uses memory
|
||||
blocks obtained from the heap to save data that would otherwise be saved on
|
||||
the stack. To build PCRE2 like this, use
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
on the "configure" command. PCRE2 runs more slowly in this mode, but it may
|
||||
be necessary in environments with limited stack sizes. This applies only to
|
||||
the normal execution of the pcre2_match() function; if JIT support is being
|
||||
successfully used, it is not relevant. Equally, it does not apply to
|
||||
pcre2_dfa_match(), which does not use deeply nested recursion. There is a
|
||||
discussion about stack sizes in the pcre2stack man page.
|
||||
|
||||
. For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, it uses a set of
|
||||
tables for ASCII encoding that is part of the distribution. If you specify
|
||||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre2_chartables.c. If you do
|
||||
not specify this option, pcre2_chartables.c is created as a copy of
|
||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||
information.
|
||||
|
||||
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
|
||||
character code (as opposed to ASCII/Unicode) by specifying
|
||||
|
||||
--enable-ebcdic --disable-unicode
|
||||
|
||||
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
|
||||
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
|
||||
which specifies that the code value for the EBCDIC NL character is 0x25
|
||||
instead of the default 0x15.
|
||||
|
||||
. If you specify --enable-debug, additional debugging code is included in the
|
||||
build. This option is intended for use by the PCRE2 maintainers.
|
||||
|
||||
. In environments where valgrind is installed, if you specify
|
||||
|
||||
--enable-valgrind
|
||||
|
||||
PCRE2 will use valgrind annotations to mark certain memory regions as
|
||||
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||
mostly useful for debugging PCRE2 itself.
|
||||
|
||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||
is installed, if you specify
|
||||
|
||||
--enable-coverage
|
||||
|
||||
the build process implements a code coverage report for the test suite. The
|
||||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE2 for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE2. There is more information about coverage
|
||||
reporting in the "pcre2build" documentation.
|
||||
|
||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
||||
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||
specifying one or both of
|
||||
|
||||
--enable-pcre2grep-libz
|
||||
--enable-pcre2grep-libbz2
|
||||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. The default size (in bytes) of the internal buffer used by pcre2grep can be
|
||||
set by, for example:
|
||||
|
||||
--with-pcre2grep-bufsize=51200
|
||||
|
||||
The value must be a plain integer. The default is 20480.
|
||||
|
||||
. It is possible to compile pcre2test so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
||||
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
|
||||
|
||||
If this is done, when pcre2test's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
pcre2test linked in this way, there may be licensing issues. These can be
|
||||
avoided by linking with libedit (which has a BSD licence) instead.
|
||||
|
||||
Enabling libreadline causes the -lreadline option to be added to the
|
||||
pcre2test build. In many operating environments with a sytem-installed
|
||||
readline library this is sufficient. However, in some environments (e.g. if
|
||||
an unmodified distribution version of readline is in use), it may be
|
||||
necessary to specify something like LIBS="-lncurses" as well. This is
|
||||
because, to quote the readline INSTALL, "Readline uses the termcap functions,
|
||||
but does not link with the termcap or curses library itself, allowing
|
||||
applications which link with readline the to choose an appropriate library."
|
||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||
should fix it.
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile the makefile that builds the library
|
||||
. src/config.h build-time configuration options for the library
|
||||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
. libpcre2-32.pc )
|
||||
. libpcre2-posix.pc )
|
||||
. libtool script that builds shared and/or static libraries
|
||||
|
||||
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
|
||||
tarballs under the names config.h.generic and pcre2.h.generic. These are
|
||||
provided for those who have to build PCRE2 without using "configure" or CMake.
|
||||
If you use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
The "configure" script also creates config.status, which is an executable
|
||||
script that can be run to recreate the configuration, and config.log, which
|
||||
contains compiler output from tests that "configure" runs.
|
||||
|
||||
Once "configure" has run, you can run "make". This builds whichever of the
|
||||
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
|
||||
program called pcre2test. If you enabled JIT support with --enable-jit, another
|
||||
test program called pcre2_jit_test is built as well. If the 8-bit library is
|
||||
built, libpcre2-posix and the pcre2grep command are also built. Running
|
||||
"make" with the -j option may speed up compilation on multiprocessor systems.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE2
|
||||
tests are given below in a separate section of this document. The -j option of
|
||||
"make" can also be used when running the tests.
|
||||
|
||||
You can use "make install" to install PCRE2 into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
<prefix> that is set when "configure" is run):
|
||||
|
||||
Commands (bin):
|
||||
pcre2test
|
||||
pcre2grep (if 8-bit support is enabled)
|
||||
pcre2-config
|
||||
|
||||
Libraries (lib):
|
||||
libpcre2-8 (if 8-bit support is enabled)
|
||||
libpcre2-16 (if 16-bit support is enabled)
|
||||
libpcre2-32 (if 32-bit support is enabled)
|
||||
libpcre2-posix (if 8-bit support is enabled)
|
||||
|
||||
Configuration information (lib/pkgconfig):
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
|
||||
Header files (include):
|
||||
pcre2.h
|
||||
pcre2posix.h
|
||||
|
||||
Man pages (share/man/man{1,3}):
|
||||
pcre2grep.1
|
||||
pcre2test.1
|
||||
pcre2-config.1
|
||||
pcre2.3
|
||||
pcre2*.3 (lots more pages, all starting "pcre2")
|
||||
|
||||
HTML documentation (share/doc/pcre2/html):
|
||||
index.html
|
||||
*.html (lots more pages, hyperlinked from index.html)
|
||||
|
||||
Text file documentation (share/doc/pcre2):
|
||||
AUTHORS
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
pcre2.txt (a concatenation of the man(3) pages)
|
||||
pcre2test.txt the pcre2test man page
|
||||
pcre2grep.txt the pcre2grep man page
|
||||
pcre2-config.txt the pcre2-config man page
|
||||
|
||||
If you want to remove PCRE2 from your system, you can run "make uninstall".
|
||||
This removes all the files that "make install" installed. However, it does not
|
||||
remove any directories, because these are often shared with other programs.
|
||||
|
||||
|
||||
Retrieving configuration information
|
||||
------------------------------------
|
||||
|
||||
Running "make install" installs the command pcre2-config, which can be used to
|
||||
recall information about the PCRE2 configuration and installation. For example:
|
||||
|
||||
pcre2-config --version
|
||||
|
||||
prints the version number, and
|
||||
|
||||
pcre2-config --libs8
|
||||
|
||||
outputs information about where the 8-bit library is installed. This command
|
||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
obtain a list of possible arguments.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
about installed libraries. Instead of separate commands for each library, a
|
||||
single command is used. For example:
|
||||
|
||||
pkg-config --libs libpcre2-16
|
||||
|
||||
The data is held in *.pc files that are installed in a directory called
|
||||
<prefix>/lib/pkgconfig.
|
||||
|
||||
|
||||
Shared libraries
|
||||
----------------
|
||||
|
||||
The default distribution builds PCRE2 as shared libraries and static libraries,
|
||||
as long as the operating system supports shared libraries. Shared library
|
||||
support relies on the "libtool" script which is built as part of the
|
||||
"configure" process.
|
||||
|
||||
The libtool script is used to compile and link both shared and static
|
||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||
built. The programs pcre2test and pcre2grep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcre2grep and pcre2test are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the build directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE2 using static libraries only you must use --disable-shared when
|
||||
configuring it. For example:
|
||||
|
||||
./configure --prefix=/usr/gnu --disable-shared
|
||||
|
||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling using autotools
|
||||
-------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre2_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||
tables that assumes ASCII code. Cross-compiling with the default tables should
|
||||
not be a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
|
||||
and run it on the local host to make a new version of pcre2_chartables.c.dist.
|
||||
Then when you cross-compile PCRE2 this new version of the tables will be used.
|
||||
|
||||
|
||||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
|
||||
|
||||
Testing PCRE2
|
||||
------------
|
||||
|
||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the pcre2grep command.
|
||||
When JIT support is enabled, a third test program called pcre2_jit_test is
|
||||
built. Both the scripts and all the program tests are run if you obey "make
|
||||
check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
|
||||
|
||||
The RunTest script runs the pcre2test test program (which is documented in its
|
||||
own man page) on each of the relevant testinput files in the testdata
|
||||
directory, and compares the output with the contents of the corresponding
|
||||
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||
from pcre2test. Other files whose names begin with "test" are used as working
|
||||
files in some tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options were selected. For
|
||||
example, the tests for UTF-8/16/32 features are run only when Unicode support
|
||||
is available. RunTest outputs a comment when it skips a test.
|
||||
|
||||
Many (but not all) of the tests that are not skipped are run twice if JIT
|
||||
support is available. On the second run, JIT compilation is forced. This
|
||||
testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
on the RunTest command line. To run pcre2test on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
RunTest 2 7 11
|
||||
|
||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
end), or a number preceded by ~ to exclude a test. For example:
|
||||
|
||||
Runtest 3-15 ~10
|
||||
|
||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||
except test 13. Whatever order the arguments are in, the tests are always run
|
||||
in numerical order.
|
||||
|
||||
You can also call RunTest with the single argument "list" to cause it to output
|
||||
a list of tests.
|
||||
|
||||
The test sequence starts with "test 0", which is a special test that has no
|
||||
input file, and whose output is not checked. This is because it will be
|
||||
different on different hardware and with different configurations. The test
|
||||
exists in order to exercise some of pcre2test's code that would not otherwise
|
||||
be run.
|
||||
|
||||
Tests 1 and 2 can always be run, as they expect only plain text strings (not
|
||||
UTF) and make no use of Unicode properties. The first test file can be fed
|
||||
directly into the perltest.sh script to check that Perl gives the same results.
|
||||
The only difference you should see is in the first few lines, where the Perl
|
||||
version is given instead of the PCRE2 version. The second set of tests check
|
||||
auxiliary functions, error detection, and run-time flags that are specific to
|
||||
PCRE2. It also uses the debugging flags to check some of the internals of
|
||||
pcre2_compile().
|
||||
|
||||
If you build PCRE2 with a locale setting that is not the standard C locale, the
|
||||
character tables may be different (see next paragraph). In some cases, this may
|
||||
cause failures in the second set of tests. For example, in a locale where the
|
||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||
[:isascii:] inside a character class defines a different set of characters, and
|
||||
this shows up in this test as a difference in the compiled code, which is being
|
||||
listed for checking. For example, where the comparison test output contains
|
||||
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
|
||||
cases. This is not a bug in PCRE2.
|
||||
|
||||
Test 3 checks pcre2_maketables(), the facility for building a set of character
|
||||
tables for a specific locale and using them instead of the default tables. The
|
||||
script uses the "locale" command to check for the availability of the "fr_FR",
|
||||
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
|
||||
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
|
||||
the list of available locales, the third test cannot be run, and a comment is
|
||||
output to say why. If running this test produces an error like this:
|
||||
|
||||
** Failed to set locale "fr_FR"
|
||||
|
||||
it means that the given locale is not available on your system, despite being
|
||||
listed by "locale". This does not mean that PCRE2 is broken. There are three
|
||||
alternative output files for the third test, because three different versions
|
||||
of the French locale have been encountered. The test passes if its output
|
||||
matches any one of them.
|
||||
|
||||
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
|
||||
with the perltest.sh script, and test 5 checking PCRE2-specific things.
|
||||
|
||||
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
|
||||
non-UTF mode and UTF-mode with Unicode property support, respectively.
|
||||
|
||||
Test 8 checks some internal offsets and code size features; it is run only when
|
||||
the default "link size" of 2 is set (in other cases the sizes change) and when
|
||||
Unicode support is enabled.
|
||||
|
||||
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
|
||||
16-bit and 32-bit modes. These are tests that generate different output in
|
||||
8-bit mode. Each pair are for general cases and Unicode support, respectively.
|
||||
Test 13 checks the handling of non-UTF characters greater than 255 by
|
||||
pcre2_dfa_match() in 16-bit and 32-bit modes.
|
||||
|
||||
Test 14 contains a number of tests that must not be run with JIT. They check,
|
||||
among other non-JIT things, the match-limiting features of the intepretive
|
||||
matcher.
|
||||
|
||||
Test 15 is run only when JIT support is not available. It checks that an
|
||||
attempt to use JIT has the expected behaviour.
|
||||
|
||||
Test 16 is run only when JIT support is available. It checks JIT complete and
|
||||
partial modes, match-limiting under JIT, and other JIT-specific features.
|
||||
|
||||
Tests 17 and 18 are run only in 8-bit mode. They check the POSIX interface to
|
||||
the 8-bit library, without and with Unicode support, respectively.
|
||||
|
||||
Test 19 checks the serialization functions by writing a set of compiled
|
||||
patterns to a file, and then reloading and checking them.
|
||||
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, a set of tables that is
|
||||
built into the library is used. The pcre2_maketables() function can be called
|
||||
by an application to create a new set of tables in the current locale. This are
|
||||
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
|
||||
compile context.
|
||||
|
||||
The source file called pcre2_chartables.c contains the default set of tables.
|
||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||
specified for ./configure, a different version of pcre2_chartables.c is built
|
||||
by the program dftables (compiled from dftables.c), which uses the ANSI C
|
||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||
islower(), etc. to build the table sources. This means that the default C
|
||||
locale which is set for your system will control the contents of these default
|
||||
tables. You can change the default tables by editing pcre2_chartables.c and
|
||||
then re-building PCRE2. If you do this, you should take care to ensure that the
|
||||
file does not get automatically re-generated. The best way to do this is to
|
||||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre2_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256. The final 256-byte table has bits indicating various character types,
|
||||
as follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE2 to malfunction.
|
||||
|
||||
|
||||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the files listed below.
|
||||
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||
ASCII coding; unless --enable-rebuild-chartables is
|
||||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
|
||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||
|
||||
src/config.h.in template for config.h, when built by "configure"
|
||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||
src/pcre2posix.h header for the external POSIX wrapper API
|
||||
src/pcre2_internal.h header for internal use
|
||||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
|
||||
sljit/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
src/pcre2demo.c simple demonstration of coding calls to PCRE2
|
||||
src/pcre2grep.c source of a grep utility that uses PCRE2
|
||||
src/pcre2test.c comprehensive test program
|
||||
src/pcre2_printint.c part of pcre2test
|
||||
src/pcre2_jit_test.c JIT test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE2
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE2
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE2
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcre2grep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for PCRE2
|
||||
doc/*.1 man page sources for pcre2grep and pcre2test
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre2.txt plain text version of the man pages
|
||||
doc/pcre2test.txt plain text documentation of test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||
libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
testdata/* other supporting test files
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindEditline.cmake
|
||||
cmake/FindReadline.cmake
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for building PCRE2 "by hand"
|
||||
|
||||
pcre2.h.generic ) a version of the public PCRE2 header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 24 April 2015
|
612
pcre2/RunGrepTest
Executable file
612
pcre2/RunGrepTest
Executable file
@ -0,0 +1,612 @@
|
||||
#! /bin/sh
|
||||
|
||||
# Run pcre2grep tests. The assumption is that the PCRE2 tests check the library
|
||||
# itself. What we are checking here is the file handling and options that are
|
||||
# supported by pcre2grep. This script must be run in the build directory.
|
||||
|
||||
# Set the C locale, so that sort(1) behaves predictably.
|
||||
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
||||
# Remove any non-default colouring and aliases that the caller may have set.
|
||||
|
||||
unset PCRE2GREP_COLOUR PCRE2GREP_COLOR
|
||||
unset cp ls mv rm
|
||||
|
||||
# Remember the current (build) directory, set the program to be tested, and
|
||||
# valgrind settings when requested.
|
||||
|
||||
builddir=`pwd`
|
||||
pcre2grep=$builddir/pcre2grep
|
||||
|
||||
if [ ! -x $pcre2grep ] ; then
|
||||
echo "** $pcre2grep does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
valgrind=
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all";;
|
||||
*) echo "RunGrepTest: Unknown argument $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
echo " "
|
||||
pcre2grep_version=`$pcre2grep -V`
|
||||
if [ "$valgrind" = "" ] ; then
|
||||
echo "Testing $pcre2grep_version"
|
||||
else
|
||||
echo "Testing $pcre2grep_version using valgrind"
|
||||
fi
|
||||
|
||||
# Set up a suitable "diff" command for comparison. Some systems have a diff
|
||||
# that lacks a -u option. Try to deal with this; better do the test for the -b
|
||||
# option as well.
|
||||
|
||||
cf="diff"
|
||||
diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b"
|
||||
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
||||
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
|
||||
|
||||
# If this test is being run from "make check", $srcdir will be set. If not, set
|
||||
# it to the current or parent directory, whichever one contains the test data.
|
||||
# Subsequently, we run most of the pcre2grep tests in the source directory so
|
||||
# that the file names in the output are always the same.
|
||||
|
||||
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
|
||||
if [ -d "./testdata" ] ; then
|
||||
srcdir=.
|
||||
elif [ -d "../testdata" ] ; then
|
||||
srcdir=..
|
||||
else
|
||||
echo "Cannot find the testdata directory"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for the availability of UTF-8 support
|
||||
|
||||
./pcre2test -C unicode >/dev/null
|
||||
utf8=$?
|
||||
|
||||
# ------ Function to run and check a special pcre2grep arguments test -------
|
||||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind ./pcre2grep $1 >>testtrygrep 2>&1
|
||||
if [ $? -ne $2 ] ; then
|
||||
echo "** pcre2grep $1 failed - check testtrygrep"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ------ Normal tests ------
|
||||
|
||||
echo "Testing pcre2grep main features"
|
||||
|
||||
echo "---------------------------- Test 1 ------------------------------" >testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 2 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep '^PATTERN' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 3 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 4 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -ic PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 5 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 6 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 7 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 8 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 9 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 10 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 11 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -vn pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 12 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -ix pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 13 -----------------------------" >>testtrygrep
|
||||
echo seventeen >testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 14 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 18 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 19 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 20 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 21 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nA3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 22 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nB3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 23 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -C3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 24 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 25 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nB9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 26 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 27 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 28 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -nB10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 29 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 30 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 31 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 32 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 33 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 34 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 35 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "======== STDERR ========" >>testtrygrep
|
||||
cat teststderrgrep >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 38 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep '>\x00<' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 39 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 40 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 42 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 43 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 44 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 45 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Fx "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 48 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -F "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 49 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -F -e DATA -e "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 50 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 51 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 52 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --colour=always jumps ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 53 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 54 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 55 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -c lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -c -l lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regex=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 59 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 60 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regex PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 61 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --regexp PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 62 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 65 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 66 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 67 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 68 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 69 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 72 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 73 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 74 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 75 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 76 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 77 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 78 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 79 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 80 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 81 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 82 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
|
||||
echo testdata/grepinput3 >testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 85 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 86 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 87 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 88 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 89 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 90 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 91 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 92 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 93 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 94 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 95 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >testtemp2grep
|
||||
(cd $srcdir; $valgrind $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 101 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 102 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 103 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 104 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 105 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 106 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
|
||||
echo "a" >testtemp1grep
|
||||
echo "aaaaa" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# These tests require UTF-8 support
|
||||
|
||||
if [ $utf8 -ne 0 ] ; then
|
||||
echo "Testing pcre2grep UTF-8 features"
|
||||
|
||||
echo "---------------------------- Test U1 ------------------------------" >testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U2 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutput8 testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
else
|
||||
echo "Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library"
|
||||
fi
|
||||
|
||||
|
||||
# We go to some contortions to try to ensure that the tests for the various
|
||||
# newline settings will work in environments where the normal newline sequence
|
||||
# is not \n. Do not use exported files, whose line endings might be changed.
|
||||
# Instead, create an input file using printf so that its contents are exactly
|
||||
# what we want. Note the messy fudge to get printf to write a string that
|
||||
# starts with a hyphen. These tests are run in the build directory.
|
||||
|
||||
echo "Testing pcre2grep newline settings"
|
||||
printf "abc\rdef\r\nghi\njkl" >testNinputgrep
|
||||
|
||||
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
|
||||
$valgrind $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
|
||||
pattern=`printf 'def\rjkl'`
|
||||
$valgrind $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutputN testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# Finally, some tests to exercise code that is not tested above, just to be
|
||||
# sure that it runs OK. Doing this improves the coverage statistics. The output
|
||||
# is not checked.
|
||||
|
||||
echo "Testing miscellaneous pcre2grep arguments (unchecked)"
|
||||
echo '' >testtrygrep
|
||||
checkspecial '-xxxxx' 2
|
||||
checkspecial '--help' 0
|
||||
checkspecial '--line-buffered --colour=auto abc /dev/null' 1
|
||||
|
||||
# Clean up local working files
|
||||
rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
|
||||
|
||||
exit 0
|
||||
|
||||
# End
|
749
pcre2/RunTest
Executable file
749
pcre2/RunTest
Executable file
@ -0,0 +1,749 @@
|
||||
#! /bin/sh
|
||||
|
||||
###############################################################################
|
||||
# Run the PCRE2 tests using the pcre2test program. The appropriate tests are
|
||||
# selected, depending on which build-time options were used.
|
||||
#
|
||||
# When JIT support is available, all appropriate tests are run with and without
|
||||
# JIT, unless "-nojit" is given on the command line. There are also two tests
|
||||
# for JIT-specific features, one to be run when JIT support is available
|
||||
# (unless "-nojit" is specified), and one when it is not.
|
||||
#
|
||||
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
||||
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
||||
# command line.
|
||||
#
|
||||
# As well as "-nojit", "-8", "-16", and "-32", arguments for this script are
|
||||
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
|
||||
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
||||
# except test 10. Whatever order the arguments are in, the tests are always run
|
||||
# in numerical order.
|
||||
#
|
||||
# Inappropriate tests are automatically skipped (with a comment to say so). For
|
||||
# example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
|
||||
# support is compiled, test 15 is skipped.
|
||||
#
|
||||
# Other arguments can be one of the words "-valgrind", "-valgrind-log", or
|
||||
# "-sim" followed by an argument to run cross-compiled executables under a
|
||||
# simulator, for example:
|
||||
#
|
||||
# RunTest 3 -sim "qemu-arm -s 8388608"
|
||||
#
|
||||
# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may
|
||||
# be given without the leading "-" character.
|
||||
#
|
||||
# There are two special cases where only one argument is allowed:
|
||||
#
|
||||
# If the first and only argument is "ebcdic", the script runs the special
|
||||
# EBCDIC test that can be useful for checking certain EBCDIC features, even
|
||||
# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for
|
||||
# this test to be run.
|
||||
#
|
||||
# If the script is obeyed as "RunTest list", a list of available tests is
|
||||
# output, but none of them are run.
|
||||
###############################################################################
|
||||
|
||||
# Define test titles in variables so that they can be output as a list. Some
|
||||
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
|
||||
|
||||
title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)"
|
||||
title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)"
|
||||
title2="Test 2: API, errors, internals, and non-Perl stuff"
|
||||
title3="Test 3: Locale-specific features"
|
||||
title4A="Test 4: UTF"
|
||||
title4B=" and Unicode property support (compatible with Perl >= 5.10)"
|
||||
title5A="Test 5: API, internals, and non-Perl stuff for UTF"
|
||||
title5B=" and UCP support"
|
||||
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
|
||||
title7A="Test 7: DFA matching with UTF"
|
||||
title7B=" and Unicode property support"
|
||||
title8="Test 8: Internal offsets and code size tests"
|
||||
title9="Test 9: Specials for the basic 8-bit library"
|
||||
title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
|
||||
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
|
||||
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
|
||||
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
|
||||
title14="Test 14: Non-JIT limits and other non-JIT tests"
|
||||
title15="Test 15: JIT-specific features when JIT is not available"
|
||||
title16="Test 16: JIT-specific features when JIT is available"
|
||||
title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP"
|
||||
title18="Test 18: Tests of the POSIX interface with UTF/UCP"
|
||||
title19="Test 19: Serialization tests"
|
||||
maxtest=18
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title0
|
||||
echo $title1
|
||||
echo $title2 "(not UTF or UCP)"
|
||||
echo $title3
|
||||
echo $title4A $title4B
|
||||
echo $title5A $title5B
|
||||
echo $title6
|
||||
echo $title7A $title7B
|
||||
echo $title8
|
||||
echo $title9
|
||||
echo $title10
|
||||
echo $title11
|
||||
echo $title12
|
||||
echo $title13
|
||||
echo $title14
|
||||
echo $title15
|
||||
echo $title16
|
||||
echo $title17
|
||||
echo $title18
|
||||
echo $title19
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Set up a suitable "diff" command for comparison. Some systems
|
||||
# have a diff that lacks a -u option. Try to deal with this.
|
||||
|
||||
cf="diff"
|
||||
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
||||
|
||||
# Find the test data
|
||||
|
||||
if [ -n "$srcdir" -a -d "$srcdir" ] ; then
|
||||
testdata="$srcdir/testdata"
|
||||
elif [ -d "./testdata" ] ; then
|
||||
testdata=./testdata
|
||||
elif [ -d "../testdata" ] ; then
|
||||
testdata=../testdata
|
||||
else
|
||||
echo "Cannot find the testdata directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# ------ Function to check results of a test -------
|
||||
|
||||
# This function is called with three parameters:
|
||||
#
|
||||
# $1 the value of $? after a call to pcre2test
|
||||
# $2 the suffix of the output file to compare with
|
||||
# $3 the $opt value (empty, -jit, or -dfa)
|
||||
#
|
||||
# Note: must define using name(), not "function name", for Solaris.
|
||||
|
||||
checkresult()
|
||||
{
|
||||
if [ $1 -ne 0 ] ; then
|
||||
echo "** pcre2test failed - check testtry"
|
||||
exit 1
|
||||
fi
|
||||
case "$3" in
|
||||
-jit) with=" with JIT";;
|
||||
-dfa) with=" with DFA";;
|
||||
*) with="";;
|
||||
esac
|
||||
$cf $testdata/testoutput$2 testtry
|
||||
if [ $? != 0 ] ; then
|
||||
echo ""
|
||||
echo "** Test $2 failed$with"
|
||||
exit 1
|
||||
fi
|
||||
echo " OK$with"
|
||||
}
|
||||
|
||||
|
||||
# ------ Function to run and check a special pcre2test arguments test -------
|
||||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind ./pcre2test $1 >>testtry
|
||||
if [ $? -ne 0 ] ; then
|
||||
echo "** pcre2test $1 failed - check testtry"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
# ------ Special EBCDIC Test -------
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
|
||||
$valgrind ./pcre2test -C ebcdic >/dev/null
|
||||
ebcdic=$?
|
||||
if [ $ebcdic -ne 1 ] ; then
|
||||
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
|
||||
exit 1
|
||||
fi
|
||||
for opt in "" "-dfa"; do
|
||||
./pcre2test -q $opt $testdata/testinputEBC >testtry
|
||||
checkresult $? EBC "$opt"
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
# ------ Normal Tests ------
|
||||
|
||||
# Default values
|
||||
|
||||
arg8=
|
||||
arg16=
|
||||
arg32=
|
||||
nojit=
|
||||
sim=
|
||||
skip=
|
||||
valgrind=
|
||||
|
||||
# This is in case the caller has set aliases (as I do - PH)
|
||||
unset cp ls mv rm
|
||||
|
||||
# Process options and select which tests to run; for those that are explicitly
|
||||
# requested, check that the necessary optional facilities are available.
|
||||
|
||||
do0=no
|
||||
do1=no
|
||||
do2=no
|
||||
do3=no
|
||||
do4=no
|
||||
do5=no
|
||||
do6=no
|
||||
do7=no
|
||||
do8=no
|
||||
do9=no
|
||||
do10=no
|
||||
do11=no
|
||||
do12=no
|
||||
do13=no
|
||||
do14=no
|
||||
do15=no
|
||||
do16=no
|
||||
do17=no
|
||||
do18=no
|
||||
do19=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
0) do0=yes;;
|
||||
1) do1=yes;;
|
||||
2) do2=yes;;
|
||||
3) do3=yes;;
|
||||
4) do4=yes;;
|
||||
5) do5=yes;;
|
||||
6) do6=yes;;
|
||||
7) do7=yes;;
|
||||
8) do8=yes;;
|
||||
9) do9=yes;;
|
||||
10) do10=yes;;
|
||||
11) do11=yes;;
|
||||
12) do12=yes;;
|
||||
13) do13=yes;;
|
||||
14) do14=yes;;
|
||||
15) do15=yes;;
|
||||
16) do16=yes;;
|
||||
17) do17=yes;;
|
||||
18) do18=yes;;
|
||||
19) do19=yes;;
|
||||
-8) arg8=yes;;
|
||||
-16) arg16=yes;;
|
||||
-32) arg32=yes;;
|
||||
nojit|-nojit) nojit=yes;;
|
||||
sim|-sim) shift; sim=$1;;
|
||||
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
||||
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
|
||||
~*)
|
||||
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
|
||||
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
|
||||
else
|
||||
echo "Unknown option or test selector '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*-*)
|
||||
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
|
||||
tf=`expr "$1" : '\([0-9]*\)'`
|
||||
tt=`expr "$1" : '.*-\([0-9]*\)'`
|
||||
if [ "$tt" = "" ] ; then tt=$maxtest; fi
|
||||
if expr \( "$tt" ">" "$maxtest" \) >/dev/null; then
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
while expr "$tf" "<=" "$tt" >/dev/null; do
|
||||
eval do${tf}=yes
|
||||
tf=`expr $tf + 1`
|
||||
done
|
||||
else
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*) echo "Unknown option or test selector '$1'"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Find which optional facilities are available.
|
||||
|
||||
$sim ./pcre2test -C linksize >/dev/null
|
||||
link_size=$?
|
||||
if [ $link_size -lt 2 ] ; then
|
||||
echo "RunTest: Failed to find internal link size"
|
||||
exit 1
|
||||
fi
|
||||
if [ $link_size -gt 4 ] ; then
|
||||
echo "RunTest: Failed to find internal link size"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# If it is possible to set the system stack size, arrange to set a value for
|
||||
# test 2, which needs more than the even the Linux default when PCRE2 has been
|
||||
# compiled with -fsanitize=address.
|
||||
|
||||
$sim ./pcre2test -S 1 /dev/null /dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
test2stack="-S 16"
|
||||
else
|
||||
test2stack=""
|
||||
fi
|
||||
|
||||
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
|
||||
# one need be.
|
||||
|
||||
$sim ./pcre2test -C pcre2-8 >/dev/null
|
||||
support8=$?
|
||||
$sim ./pcre2test -C pcre2-16 >/dev/null
|
||||
support16=$?
|
||||
$sim ./pcre2test -C pcre2-32 >/dev/null
|
||||
support32=$?
|
||||
|
||||
# Initialize all bitsizes skipped
|
||||
|
||||
test8=skip
|
||||
test16=skip
|
||||
test32=skip
|
||||
|
||||
# If no bitsize arguments, select all that are available
|
||||
|
||||
if [ "$arg8$arg16$arg32" = "" ] ; then
|
||||
if [ $support8 -ne 0 ] ; then
|
||||
test8=-8
|
||||
fi
|
||||
if [ $support16 -ne 0 ] ; then
|
||||
test16=-16
|
||||
fi
|
||||
if [ $support32 -ne 0 ] ; then
|
||||
test32=-32
|
||||
fi
|
||||
|
||||
# Otherwise, select requested bit sizes
|
||||
|
||||
else
|
||||
if [ "$arg8" = yes ] ; then
|
||||
if [ $support8 -eq 0 ] ; then
|
||||
echo "Cannot run 8-bit library tests: 8-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test8=-8
|
||||
fi
|
||||
if [ "$arg16" = yes ] ; then
|
||||
if [ $support16 -eq 0 ] ; then
|
||||
echo "Cannot run 16-bit library tests: 16-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test16=-16
|
||||
fi
|
||||
if [ "$arg32" = yes ] ; then
|
||||
if [ $support32 -eq 0 ] ; then
|
||||
echo "Cannot run 32-bit library tests: 32-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test32=-32
|
||||
fi
|
||||
fi
|
||||
|
||||
# UTF support is implied by Unicode support, and it always applies to all bit
|
||||
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
|
||||
# UTF-32 support.
|
||||
|
||||
$sim ./pcre2test -C unicode >/dev/null
|
||||
utf=$?
|
||||
|
||||
jitopt=
|
||||
$sim ./pcre2test -C jit >/dev/null
|
||||
jit=$?
|
||||
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||
jitopt=-jit
|
||||
fi
|
||||
|
||||
# If no specific tests were requested, select all. Those that are not
|
||||
# relevant will be automatically skipped.
|
||||
|
||||
if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||
$do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \
|
||||
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
|
||||
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
||||
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no \
|
||||
]; then
|
||||
do0=yes
|
||||
do1=yes
|
||||
do2=yes
|
||||
do3=yes
|
||||
do4=yes
|
||||
do5=yes
|
||||
do6=yes
|
||||
do7=yes
|
||||
do8=yes
|
||||
do9=yes
|
||||
do10=yes
|
||||
do11=yes
|
||||
do12=yes
|
||||
do13=yes
|
||||
do14=yes
|
||||
do15=yes
|
||||
do16=yes
|
||||
do17=yes
|
||||
do18=yes
|
||||
do19=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
# only of explicit skips.
|
||||
|
||||
for i in $skip; do eval do$i=no; done
|
||||
|
||||
# Show which release and which test data
|
||||
|
||||
echo ""
|
||||
echo PCRE2 C library tests using test data from $testdata
|
||||
$sim ./pcre2test /dev/null
|
||||
echo ""
|
||||
|
||||
for bmode in "$test8" "$test16" "$test32"; do
|
||||
case "$bmode" in
|
||||
skip) continue;;
|
||||
-16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
|
||||
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
|
||||
-32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
|
||||
bits=32; echo "---- Testing 32-bit library ----"; echo "";;
|
||||
-8) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
|
||||
esac
|
||||
|
||||
# Test 0 is a special test. Its output is not checked, because it will
|
||||
# be different on different hardware and with different configurations.
|
||||
# Running this test just exercises the code.
|
||||
|
||||
if [ $do0 = yes ] ; then
|
||||
echo $title0
|
||||
echo '/abc/jit,memory' >testSinput
|
||||
echo ' abc' >>testSinput
|
||||
echo '' >testtry
|
||||
checkspecial '-C'
|
||||
checkspecial '--help'
|
||||
checkspecial '-S 1 -t 10 testSinput'
|
||||
echo " OK"
|
||||
fi
|
||||
|
||||
# Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8
|
||||
|
||||
if [ $do1 = yes ] ; then
|
||||
echo $title1
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
|
||||
checkresult $? 1 "$opt"
|
||||
done
|
||||
fi
|
||||
|
||||
# PCRE2 tests that are not Perl-compatible: API, errors, internals
|
||||
|
||||
if [ $do2 = yes ] ; then
|
||||
echo $title2 "(excluding UTF-$bits)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
checkresult $? 2 "$opt"
|
||||
else
|
||||
echo " "
|
||||
echo "** Test 2 requires a lot of stack. If it has crashed with a"
|
||||
echo "** segmentation fault, it may be that you do not have enough"
|
||||
echo "** stack available by default. Please see the 'pcre2stack' man"
|
||||
echo "** page for a discussion of PCRE2's stack usage."
|
||||
echo " "
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Locale-specific tests, provided that either the "fr_FR", "fr_CA", "french"
|
||||
# or "fr" locale is available. The first two are Unix-like standards; the
|
||||
# last two are for Windows. Unfortunately, different versions of the French
|
||||
# locale give different outputs for some items. This test passes if the
|
||||
# output matches any one of the alternative output files.
|
||||
|
||||
if [ $do3 = yes ] ; then
|
||||
locale=
|
||||
|
||||
# In some environments locales that are listed by the "locale -a"
|
||||
# command do not seem to work with setlocale(). Therefore, we do
|
||||
# a preliminary test to see if pcre2test can set one before going
|
||||
# on to use it.
|
||||
|
||||
for loc in 'fr_FR' 'french' 'fr' 'fr_CA'; do
|
||||
locale -a | grep "^$loc\$" >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
echo "/a/locale=$loc" | \
|
||||
$sim $valgrind ./pcre2test -q $bmode | \
|
||||
grep "Failed to set locale" >/dev/null
|
||||
if [ $? -ne 0 ] ; then
|
||||
locale=$loc
|
||||
if [ "$locale" = "fr_FR" ] ; then
|
||||
infile=$testdata/testinput3
|
||||
outfile=$testdata/testoutput3
|
||||
outfile2=$testdata/testoutput3A
|
||||
outfile3=$testdata/testoutput3B
|
||||
else
|
||||
infile=test3input
|
||||
outfile=test3output
|
||||
outfile2=test3outputA
|
||||
outfile3=test3outputB
|
||||
sed "s/fr_FR/$loc/" $testdata/testinput3 >test3input
|
||||
sed "s/fr_FR/$loc/" $testdata/testoutput3 >test3output
|
||||
sed "s/fr_FR/$loc/" $testdata/testoutput3A >test3outputA
|
||||
sed "s/fr_FR/$loc/" $testdata/testoutput3B >test3outputB
|
||||
fi
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$locale" != "" ] ; then
|
||||
echo $title3 "(using '$locale' locale)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
|
||||
if [ $? = 0 ] ; then
|
||||
case "$opt" in
|
||||
-jit) with=" with JIT";;
|
||||
*) with="";;
|
||||
esac
|
||||
if $cf $outfile testtry >teststdout || \
|
||||
$cf $outfile2 testtry >teststdout || \
|
||||
$cf $outfile3 testtry >teststdout
|
||||
then
|
||||
echo " OK$with"
|
||||
else
|
||||
echo "** Locale test did not run successfully$with. The output did not match"
|
||||
echo " $outfile, $outfile2 or $outfile3."
|
||||
echo " This may mean that there is a problem with the locale settings rather"
|
||||
echo " than a bug in PCRE2."
|
||||
exit 1
|
||||
fi
|
||||
else exit 1
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr_CA',"
|
||||
echo "'fr' or 'french' locales can be set, or the \"locale\" command is"
|
||||
echo "not available to check for them."
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for UTF and Unicode property support
|
||||
|
||||
if [ $do4 = yes ] ; then
|
||||
echo ${title4A}-${bits}${title4B}
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
|
||||
checkresult $? 4 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $do5 = yes ] ; then
|
||||
echo ${title5A}-${bits}$title5B
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
|
||||
checkresult $? 5 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for DFA matching support
|
||||
|
||||
if [ $do6 = yes ] ; then
|
||||
echo $title6
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
|
||||
checkresult $? 6 ""
|
||||
fi
|
||||
|
||||
if [ $do7 = yes ] ; then
|
||||
echo ${title7A}-${bits}$title7B
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
|
||||
checkresult $? 7 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test of internal offsets and code sizes. This test is run only when there
|
||||
# is UTF/UCP support and the link size is 2. The actual tests are
|
||||
# mostly the same as in some of the above, but in this test we inspect some
|
||||
# offsets and sizes that require a known link size. This is a doublecheck for
|
||||
# the maintainer, just in case something changes unexpectely. The output from
|
||||
# this test is different in 8-bit, 16-bit, and 32-bit modes, so there are
|
||||
# mode-specific output files.
|
||||
|
||||
if [ $do8 = yes ] ; then
|
||||
echo $title8
|
||||
if [ $link_size -ne 2 ] ; then
|
||||
echo " Skipped because link size is not 2"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
|
||||
checkresult $? 8-$bits ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 8-bit-specific features
|
||||
|
||||
if [ "$do9" = yes ] ; then
|
||||
echo $title9
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
|
||||
checkresult $? 9 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for UTF-8 and UCP 8-bit-specific features
|
||||
|
||||
if [ "$do10" = yes ] ; then
|
||||
echo $title10
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
|
||||
checkresult $? 10 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16-bit and 32-bit features. Output is different for the two widths.
|
||||
|
||||
if [ $do11 = yes ] ; then
|
||||
echo $title11
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
|
||||
checkresult $? 11-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output
|
||||
# is different for the two widths.
|
||||
|
||||
if [ $do12 = yes ] ; then
|
||||
echo $title12
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
|
||||
checkresult $? 12-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16/32-bit-specific features in DFA non-UTF modes
|
||||
|
||||
if [ $do13 = yes ] ; then
|
||||
echo $title13
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
|
||||
checkresult $? 13 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test non-JIT match and recursion limits
|
||||
|
||||
if [ $do14 = yes ] ; then
|
||||
echo $title14
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry
|
||||
checkresult $? 14 ""
|
||||
fi
|
||||
|
||||
# Test JIT-specific features when JIT is not available
|
||||
|
||||
if [ $do15 = yes ] ; then
|
||||
echo $title15
|
||||
if [ $jit -ne 0 ] ; then
|
||||
echo " Skipped because JIT is available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry
|
||||
checkresult $? 15 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test JIT-specific features when JIT is available
|
||||
|
||||
if [ $do16 = yes ] ; then
|
||||
echo $title16
|
||||
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||
echo " Skipped because JIT is not available or nojit was specified"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry
|
||||
checkresult $? 16 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for the POSIX interface without UTF/UCP (8-bit only)
|
||||
|
||||
if [ $do17 = yes ] ; then
|
||||
echo $title17
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry
|
||||
checkresult $? 17 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for the POSIX interface with UTF/UCP (8-bit only)
|
||||
|
||||
if [ $do18 = yes ] ; then
|
||||
echo $title18
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry
|
||||
checkresult $? 18 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Serialization tests
|
||||
|
||||
if [ $do19 = yes ] ; then
|
||||
echo $title19
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput19 testtry
|
||||
checkresult $? 19 ""
|
||||
fi
|
||||
|
||||
# End of loop for 8/16/32-bit tests
|
||||
done
|
||||
|
||||
# Clean up local working files
|
||||
rm -f testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
|
||||
|
||||
# End
|
463
pcre2/RunTest.bat
Normal file
463
pcre2/RunTest.bat
Normal file
@ -0,0 +1,463 @@
|
||||
@echo off
|
||||
@rem
|
||||
@rem MS Windows batch file to run pcre2test on testfiles with the correct
|
||||
@rem options. This file must use CRLF linebreaks to function properly,
|
||||
@rem and requires both pcre2test and pcre2grep.
|
||||
@rem
|
||||
@rem ------------------------ HISTORY ----------------------------------
|
||||
@rem This file was originally contributed to PCRE1 by Ralf Junker, and touched
|
||||
@rem up by Daniel Richard G. Tests 10-12 added by Philip H.
|
||||
@rem Philip H also changed test 3 to use "wintest" files.
|
||||
@rem
|
||||
@rem Updated by Tom Fortmann to support explicit test numbers on the command
|
||||
@rem line. Added argument validation and added error reporting.
|
||||
@rem
|
||||
@rem Sheri Pierce added logic to skip feature dependent tests
|
||||
@rem tests 4 5 9 15 and 18 require utf support
|
||||
@rem tests 6 7 10 16 and 19 require ucp support
|
||||
@rem 11 requires ucp and link size 2
|
||||
@rem 12 requires presence of jit support
|
||||
@rem 13 requires absence of jit support
|
||||
@rem Sheri P also added override tests for study and jit testing
|
||||
@rem Zoltan Herczeg added libpcre16 support
|
||||
@rem Zoltan Herczeg added libpcre32 support
|
||||
@rem -------------------------------------------------------------------
|
||||
@rem
|
||||
@rem The file was converted for PCRE2 by PH, February 2015.
|
||||
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
if [%srcdir%]==[] (
|
||||
if exist testdata\ set srcdir=.)
|
||||
if [%srcdir%]==[] (
|
||||
if exist ..\testdata\ set srcdir=..)
|
||||
if [%srcdir%]==[] (
|
||||
if exist ..\..\testdata\ set srcdir=..\..)
|
||||
if NOT exist %srcdir%\testdata\ (
|
||||
Error: echo distribution testdata folder not found!
|
||||
call :conferror
|
||||
exit /b 1
|
||||
goto :eof
|
||||
)
|
||||
|
||||
if [%pcre2test%]==[] set pcre2test=.\pcre2test.exe
|
||||
|
||||
echo source dir is %srcdir%
|
||||
echo pcre2test=%pcre2test%
|
||||
|
||||
if NOT exist %pcre2test% (
|
||||
echo Error: %pcre2test% not found!
|
||||
echo.
|
||||
call :conferror
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
%pcre2test% -C linksize >NUL
|
||||
set link_size=%ERRORLEVEL%
|
||||
%pcre2test% -C pcre2-8 >NUL
|
||||
set support8=%ERRORLEVEL%
|
||||
%pcre2test% -C pcre2-16 >NUL
|
||||
set support16=%ERRORLEVEL%
|
||||
%pcre2test% -C pcre2-32 >NUL
|
||||
set support32=%ERRORLEVEL%
|
||||
%pcre2test% -C unicode >NUL
|
||||
set unicode=%ERRORLEVEL%
|
||||
%pcre2test% -C jit >NUL
|
||||
set jit=%ERRORLEVEL%
|
||||
|
||||
if %support8% EQU 1 (
|
||||
if not exist testout8 md testout8
|
||||
if not exist testoutjit8 md testoutjit8
|
||||
)
|
||||
|
||||
if %support16% EQU 1 (
|
||||
if not exist testout16 md testout16
|
||||
if not exist testoutjit16 md testoutjit16
|
||||
)
|
||||
|
||||
if %support16% EQU 1 (
|
||||
if not exist testout32 md testout32
|
||||
if not exist testoutjit32 md testoutjit32
|
||||
)
|
||||
|
||||
set do1=no
|
||||
set do2=no
|
||||
set do3=no
|
||||
set do4=no
|
||||
set do5=no
|
||||
set do6=no
|
||||
set do7=no
|
||||
set do8=no
|
||||
set do9=no
|
||||
set do10=no
|
||||
set do11=no
|
||||
set do12=no
|
||||
set do13=no
|
||||
set do14=no
|
||||
set do15=no
|
||||
set do16=no
|
||||
set do17=no
|
||||
set do18=no
|
||||
set do19=no
|
||||
set all=yes
|
||||
|
||||
for %%a in (%*) do (
|
||||
set valid=no
|
||||
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19) do if %%v == %%a set valid=yes
|
||||
if "!valid!" == "yes" (
|
||||
set do%%a=yes
|
||||
set all=no
|
||||
) else (
|
||||
echo Invalid test number - %%a!
|
||||
echo Usage %0 [ test_number ] ...
|
||||
echo Where test_number is one or more optional test numbers 1 through 19, default is all tests.
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
set failed="no"
|
||||
|
||||
if "%all%" == "yes" (
|
||||
set do1=yes
|
||||
set do2=yes
|
||||
set do3=yes
|
||||
set do4=yes
|
||||
set do5=yes
|
||||
set do6=yes
|
||||
set do7=yes
|
||||
set do8=yes
|
||||
set do9=yes
|
||||
set do10=yes
|
||||
set do11=yes
|
||||
set do12=yes
|
||||
set do13=yes
|
||||
set do14=yes
|
||||
set do15=yes
|
||||
set do16=yes
|
||||
set do17=yes
|
||||
set do18=yes
|
||||
set do19=yes
|
||||
)
|
||||
|
||||
@echo RunTest.bat's pcre2test output is written to newly created subfolders
|
||||
@echo named testout{8,16,32} and testoutjit{8,16,32}.
|
||||
@echo.
|
||||
|
||||
set mode=
|
||||
set bits=8
|
||||
|
||||
:nextMode
|
||||
if "%mode%" == "" (
|
||||
if %support8% EQU 0 goto modeSkip
|
||||
echo.
|
||||
echo ---- Testing 8-bit library ----
|
||||
echo.
|
||||
)
|
||||
if "%mode%" == "-16" (
|
||||
if %support16% EQU 0 goto modeSkip
|
||||
echo.
|
||||
echo ---- Testing 16-bit library ----
|
||||
echo.
|
||||
)
|
||||
if "%mode%" == "-32" (
|
||||
if %support32% EQU 0 goto modeSkip
|
||||
echo.
|
||||
echo ---- Testing 32-bit library ----
|
||||
echo.
|
||||
)
|
||||
if "%do1%" == "yes" call :do1
|
||||
if "%do2%" == "yes" call :do2
|
||||
if "%do3%" == "yes" call :do3
|
||||
if "%do4%" == "yes" call :do4
|
||||
if "%do5%" == "yes" call :do5
|
||||
if "%do6%" == "yes" call :do6
|
||||
if "%do7%" == "yes" call :do7
|
||||
if "%do8%" == "yes" call :do8
|
||||
if "%do9%" == "yes" call :do9
|
||||
if "%do10%" == "yes" call :do10
|
||||
if "%do11%" == "yes" call :do11
|
||||
if "%do12%" == "yes" call :do12
|
||||
if "%do13%" == "yes" call :do13
|
||||
if "%do14%" == "yes" call :do14
|
||||
if "%do15%" == "yes" call :do15
|
||||
if "%do16%" == "yes" call :do16
|
||||
if "%do17%" == "yes" call :do17
|
||||
if "%do18%" == "yes" call :do18
|
||||
if "%do19%" == "yes" call :do19
|
||||
:modeSkip
|
||||
if "%mode%" == "" (
|
||||
set mode=-16
|
||||
set bits=16
|
||||
goto nextMode
|
||||
)
|
||||
if "%mode%" == "-16" (
|
||||
set mode=-32
|
||||
set bits=32
|
||||
goto nextMode
|
||||
)
|
||||
|
||||
@rem If mode is -32, testing is finished
|
||||
if %failed% == "yes" (
|
||||
echo In above output, one or more of the various tests failed!
|
||||
exit /b 1
|
||||
)
|
||||
echo All OK
|
||||
goto :eof
|
||||
|
||||
:runsub
|
||||
@rem Function to execute pcre2test and compare the output
|
||||
@rem Arguments are as follows:
|
||||
@rem
|
||||
@rem 1 = test number
|
||||
@rem 2 = outputdir
|
||||
@rem 3 = test name use double quotes
|
||||
@rem 4 - 9 = pcre2test options
|
||||
|
||||
if [%1] == [] (
|
||||
echo Missing test number argument!
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if [%2] == [] (
|
||||
echo Missing outputdir!
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if [%3] == [] (
|
||||
echo Missing test name argument!
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
set testinput=testinput%1
|
||||
set testoutput=testoutput%1
|
||||
if exist %srcdir%\testdata\win%testinput% (
|
||||
set testinput=wintestinput%1
|
||||
set testoutput=wintestoutput%1
|
||||
)
|
||||
|
||||
echo Test %1: %3
|
||||
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput%
|
||||
if errorlevel 1 (
|
||||
echo. failed executing command-line:
|
||||
echo. %pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput%
|
||||
set failed="yes"
|
||||
goto :eof
|
||||
)
|
||||
|
||||
set type=
|
||||
if [%1]==[8] (
|
||||
set type=-%bits%
|
||||
)
|
||||
if [%1]==[11] (
|
||||
set type=-%bits%
|
||||
)
|
||||
if [%1]==[12] (
|
||||
set type=-%bits%
|
||||
)
|
||||
|
||||
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
|
||||
|
||||
if errorlevel 1 (
|
||||
echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
|
||||
if [%1]==[2] (
|
||||
echo.
|
||||
echo ** Test 2 requires a lot of stack. PCRE2 can be configured to
|
||||
echo ** use heap for recursion. Otherwise, to pass Test 2
|
||||
echo ** you generally need to allocate 8 mb stack to PCRE2.
|
||||
echo ** See the 'pcre2stack' page for a discussion of PCRE2's
|
||||
echo ** stack usage.
|
||||
echo.
|
||||
)
|
||||
if [%1]==[3] (
|
||||
echo.
|
||||
echo ** Test 3 failure usually means french locale is not
|
||||
echo ** available on the system, rather than a bug or problem with PCRE2.
|
||||
echo.
|
||||
goto :eof
|
||||
)
|
||||
|
||||
set failed="yes"
|
||||
goto :eof
|
||||
)
|
||||
|
||||
echo. Passed.
|
||||
goto :eof
|
||||
|
||||
:do1
|
||||
call :runsub 1 testout "Main non-UTF, non-UCP functionality (Compatible with Perl >= 5.10)" -q
|
||||
if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do2
|
||||
call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
|
||||
if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do3
|
||||
call :runsub 3 testout "Locale-specific features" -q
|
||||
if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do4
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 4 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 4 testout "UTF-%bits% and Unicode property support - (Compatible with Perl >= 5.10)" -q
|
||||
if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do5
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 5 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits% and UCP" -q
|
||||
if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do6
|
||||
call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q -dfa
|
||||
goto :eof
|
||||
|
||||
:do7
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 7 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q -dfa
|
||||
goto :eof
|
||||
|
||||
:do8
|
||||
if NOT %link_size% EQU 2 (
|
||||
echo Test 8 Skipped because link size is not 2.
|
||||
goto :eof
|
||||
)
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 8 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 8 testout "Internal offsets and code size tests" -q
|
||||
goto :eof
|
||||
|
||||
:do9
|
||||
if NOT %bits% EQU 8 (
|
||||
echo Test 9 Skipped when running 16/32-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 9 testout "Specials for the basic 8-bit library" -q
|
||||
if %jit% EQU 1 call :runsub 9 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do10
|
||||
if NOT %bits% EQU 8 (
|
||||
echo Test 10 Skipped when running 16/32-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 10 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 10 testout "Specials for the 8-bit library with Unicode support" -q
|
||||
if %jit% EQU 1 call :runsub 10 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do11
|
||||
if %bits% EQU 8 (
|
||||
echo Test 11 Skipped when running 8-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 11 testout "Specials for the basic 16/32-bit library" -q
|
||||
if %jit% EQU 1 call :runsub 11 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do12
|
||||
if %bits% EQU 8 (
|
||||
echo Test 12 Skipped when running 8-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 12 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 12 testout "Specials for the 16/32-bit library with Unicode support" -q
|
||||
if %jit% EQU 1 call :runsub 12 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do13
|
||||
if %bits% EQU 8 (
|
||||
echo Test 13 Skipped when running 8-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q -dfa
|
||||
goto :eof
|
||||
|
||||
:do14
|
||||
call :runsub 14 testout "Non-JIT limits and other non_JIT tests" -q
|
||||
goto :eof
|
||||
|
||||
:do15
|
||||
if %jit% EQU 1 (
|
||||
echo Test 15 Skipped due to presence of JIT support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 15 testout "JIT-specific features when JIT is not available" -q
|
||||
goto :eof
|
||||
|
||||
:do16
|
||||
if %jit% EQU 0 (
|
||||
echo Test 16 Skipped due to absence of JIT support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 16 testout "JIT-specific features when JIT is available" -q
|
||||
goto :eof
|
||||
|
||||
:do17
|
||||
if %bits% EQU 16 (
|
||||
echo Test 17 Skipped when running 16-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
if %bits% EQU 32 (
|
||||
echo Test 17 Skipped when running 32-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 17 testout "POSIX interface, excluding UTF-8 and UCP" -q
|
||||
goto :eof
|
||||
|
||||
:do18
|
||||
if %bits% EQU 16 (
|
||||
echo Test 18 Skipped when running 16-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
if %bits% EQU 32 (
|
||||
echo Test 18 Skipped when running 32-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 1 testout "POSIX interface with UTF-8 and UCP" -q
|
||||
goto :eof
|
||||
|
||||
:do19
|
||||
call :runsub 1 testout "Serialization tests" -q
|
||||
goto :eof
|
||||
|
||||
:conferror
|
||||
@echo.
|
||||
@echo Either your build is incomplete or you have a configuration error.
|
||||
@echo.
|
||||
@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS"
|
||||
@echo project, pcre2_test.bat defines variables and automatically calls RunTest.bat.
|
||||
@echo For manual testing of all available features, after configuring with cmake
|
||||
@echo and building, you can run the built pcre2_test.bat. For best results with
|
||||
@echo cmake builds and tests avoid directories with full path names that include
|
||||
@echo spaces for source or build.
|
||||
@echo.
|
||||
@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed
|
||||
@echo for input and verification should be found automatically when (from the
|
||||
@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat
|
||||
@echo runs all tests compatible with the linked pcre2 library but it can be given
|
||||
@echo a test number as an argument.
|
||||
@echo.
|
||||
@echo If the build dir is not under the source dir you can either copy your exes
|
||||
@echo to the source folder or copy RunTest.bat and the testdata folder to the
|
||||
@echo location of your built exes and then run RunTest.bat.
|
||||
@echo.
|
||||
goto :eof
|
1434
pcre2/aclocal.m4
vendored
Normal file
1434
pcre2/aclocal.m4
vendored
Normal file
File diff suppressed because it is too large
Load Diff
270
pcre2/ar-lib
Executable file
270
pcre2/ar-lib
Executable file
@ -0,0 +1,270 @@
|
||||
#! /bin/sh
|
||||
# Wrapper for Microsoft lib.exe
|
||||
|
||||
me=ar-lib
|
||||
scriptversion=2012-03-01.08; # UTC
|
||||
|
||||
# Copyright (C) 2010-2014 Free Software Foundation, Inc.
|
||||
# Written by Peter Rosin <peda@lysator.liu.se>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
|
||||
# func_error message
|
||||
func_error ()
|
||||
{
|
||||
echo "$me: $1" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv in
|
||||
mingw)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_at_file at_file operation archive
|
||||
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
|
||||
# for each of them.
|
||||
# When interpreting the content of the @FILE, do NOT use func_file_conv,
|
||||
# since the user would need to supply preconverted file names to
|
||||
# binutils ar, at least for MinGW.
|
||||
func_at_file ()
|
||||
{
|
||||
operation=$2
|
||||
archive=$3
|
||||
at_file_contents=`cat "$1"`
|
||||
eval set x "$at_file_contents"
|
||||
shift
|
||||
|
||||
for member
|
||||
do
|
||||
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
|
||||
done
|
||||
}
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
func_error "no command. Try '$0 --help' for more information."
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<EOF
|
||||
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
|
||||
|
||||
Members may be specified in a file named with @FILE.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "$me, version $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
if test $# -lt 3; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
|
||||
AR=$1
|
||||
shift
|
||||
while :
|
||||
do
|
||||
if test $# -lt 2; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
case $1 in
|
||||
-lib | -LIB \
|
||||
| -ltcg | -LTCG \
|
||||
| -machine* | -MACHINE* \
|
||||
| -subsystem* | -SUBSYSTEM* \
|
||||
| -verbose | -VERBOSE \
|
||||
| -wx* | -WX* )
|
||||
AR="$AR $1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
action=$1
|
||||
shift
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
orig_archive=$1
|
||||
shift
|
||||
func_file_conv "$orig_archive"
|
||||
archive=$file
|
||||
|
||||
# strip leading dash in $action
|
||||
action=${action#-}
|
||||
|
||||
delete=
|
||||
extract=
|
||||
list=
|
||||
quick=
|
||||
replace=
|
||||
index=
|
||||
create=
|
||||
|
||||
while test -n "$action"
|
||||
do
|
||||
case $action in
|
||||
d*) delete=yes ;;
|
||||
x*) extract=yes ;;
|
||||
t*) list=yes ;;
|
||||
q*) quick=yes ;;
|
||||
r*) replace=yes ;;
|
||||
s*) index=yes ;;
|
||||
S*) ;; # the index is always updated implicitly
|
||||
c*) create=yes ;;
|
||||
u*) ;; # TODO: don't ignore the update modifier
|
||||
v*) ;; # TODO: don't ignore the verbose modifier
|
||||
*)
|
||||
func_error "unknown action specified"
|
||||
;;
|
||||
esac
|
||||
action=${action#?}
|
||||
done
|
||||
|
||||
case $delete$extract$list$quick$replace,$index in
|
||||
yes,* | ,yes)
|
||||
;;
|
||||
yesyes*)
|
||||
func_error "more than one action specified"
|
||||
;;
|
||||
*)
|
||||
func_error "no action specified"
|
||||
;;
|
||||
esac
|
||||
|
||||
if test -n "$delete"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -REMOVE "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
elif test -n "$extract"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
if test $# -gt 0; then
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -EXTRACT "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
|
||||
do
|
||||
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
|
||||
done
|
||||
fi
|
||||
|
||||
elif test -n "$quick$replace"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
if test -z "$create"; then
|
||||
echo "$me: creating $orig_archive"
|
||||
fi
|
||||
orig_archive=
|
||||
else
|
||||
orig_archive=$archive
|
||||
fi
|
||||
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_file_conv "${1#@}"
|
||||
set x "$@" "@$file"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
set x "$@" "$file"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
shift
|
||||
done
|
||||
|
||||
if test -n "$orig_archive"; then
|
||||
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
|
||||
else
|
||||
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
|
||||
fi
|
||||
|
||||
elif test -n "$list"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
$AR -NOLOGO -LIST "$archive" || exit $?
|
||||
fi
|
22
pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
22
pcre2/cmake/COPYING-CMAKE-SCRIPTS
Normal file
@ -0,0 +1,22 @@
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
347
pcre2/compile
Executable file
347
pcre2/compile
Executable file
@ -0,0 +1,347 @@
|
||||
#! /bin/sh
|
||||
# Wrapper for compilers which do not understand '-c -o'.
|
||||
|
||||
scriptversion=2012-10-14.11; # UTC
|
||||
|
||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
|
||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
nl='
|
||||
'
|
||||
|
||||
# We need space, tab and new line, in precisely that order. Quoting is
|
||||
# there to prevent tools from complaining about whitespace usage.
|
||||
IFS=" "" $nl"
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file lazy
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts. If the determined conversion
|
||||
# type is listed in (the comma separated) LAZY, no conversion will
|
||||
# take place.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv/,$2, in
|
||||
*,$file_conv,*)
|
||||
;;
|
||||
mingw/*)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin/*)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine/*)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_cl_dashL linkdir
|
||||
# Make cl look for libraries in LINKDIR
|
||||
func_cl_dashL ()
|
||||
{
|
||||
func_file_conv "$1"
|
||||
if test -z "$lib_path"; then
|
||||
lib_path=$file
|
||||
else
|
||||
lib_path="$lib_path;$file"
|
||||
fi
|
||||
linker_opts="$linker_opts -LIBPATH:$file"
|
||||
}
|
||||
|
||||
# func_cl_dashl library
|
||||
# Do a library search-path lookup for cl
|
||||
func_cl_dashl ()
|
||||
{
|
||||
lib=$1
|
||||
found=no
|
||||
save_IFS=$IFS
|
||||
IFS=';'
|
||||
for dir in $lib_path $LIB
|
||||
do
|
||||
IFS=$save_IFS
|
||||
if $shared && test -f "$dir/$lib.dll.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.dll.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/$lib.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/lib$lib.a"; then
|
||||
found=yes
|
||||
lib=$dir/lib$lib.a
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$save_IFS
|
||||
|
||||
if test "$found" != yes; then
|
||||
lib=$lib.lib
|
||||
fi
|
||||
}
|
||||
|
||||
# func_cl_wrapper cl arg...
|
||||
# Adjust compile command to suit cl
|
||||
func_cl_wrapper ()
|
||||
{
|
||||
# Assume a capable shell
|
||||
lib_path=
|
||||
shared=:
|
||||
linker_opts=
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.[oO][bB][jJ])
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fo"$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fe"$file"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
-I)
|
||||
eat=1
|
||||
func_file_conv "$2" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-I*)
|
||||
func_file_conv "${1#-I}" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-l)
|
||||
eat=1
|
||||
func_cl_dashl "$2"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-l*)
|
||||
func_cl_dashl "${1#-l}"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-L)
|
||||
eat=1
|
||||
func_cl_dashL "$2"
|
||||
;;
|
||||
-L*)
|
||||
func_cl_dashL "${1#-L}"
|
||||
;;
|
||||
-static)
|
||||
shared=false
|
||||
;;
|
||||
-Wl,*)
|
||||
arg=${1#-Wl,}
|
||||
save_ifs="$IFS"; IFS=','
|
||||
for flag in $arg; do
|
||||
IFS="$save_ifs"
|
||||
linker_opts="$linker_opts $flag"
|
||||
done
|
||||
IFS="$save_ifs"
|
||||
;;
|
||||
-Xlinker)
|
||||
eat=1
|
||||
linker_opts="$linker_opts $2"
|
||||
;;
|
||||
-*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
|
||||
func_file_conv "$1"
|
||||
set x "$@" -Tp"$file"
|
||||
shift
|
||||
;;
|
||||
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
|
||||
func_file_conv "$1" mingw
|
||||
set x "$@" "$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
if test -n "$linker_opts"; then
|
||||
linker_opts="-link$linker_opts"
|
||||
fi
|
||||
exec "$@" $linker_opts
|
||||
exit 1
|
||||
}
|
||||
|
||||
eat=
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: compile [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Wrapper for compilers which do not understand '-c -o'.
|
||||
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
|
||||
arguments, and rename the output as expected.
|
||||
|
||||
If you are trying to build a whole package this is not the
|
||||
right script to run: please start by reading the file 'INSTALL'.
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "compile $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
|
||||
func_cl_wrapper "$@" # Doesn't return...
|
||||
;;
|
||||
esac
|
||||
|
||||
ofile=
|
||||
cfile=
|
||||
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
# So we strip '-o arg' only if arg is an object.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.obj)
|
||||
ofile=$2
|
||||
;;
|
||||
*)
|
||||
set x "$@" -o "$2"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*.c)
|
||||
cfile=$1
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
|
||||
if test -z "$ofile" || test -z "$cfile"; then
|
||||
# If no '-o' option was seen then we might have been invoked from a
|
||||
# pattern rule where we don't need one. That is ok -- this is a
|
||||
# normal compilation that the losing compiler can handle. If no
|
||||
# '.c' file was seen then we are probably linking. That is also
|
||||
# ok.
|
||||
exec "$@"
|
||||
fi
|
||||
|
||||
# Name of file we expect compiler to create.
|
||||
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
|
||||
|
||||
# Create the lock directory.
|
||||
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
|
||||
# that we are using for the .o file. Also, base the name on the expected
|
||||
# object file name, since that is what matters with a parallel build.
|
||||
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
|
||||
while true; do
|
||||
if mkdir "$lockdir" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
# FIXME: race condition here if user kills between mkdir and trap.
|
||||
trap "rmdir '$lockdir'; exit 1" 1 2 15
|
||||
|
||||
# Run the compile.
|
||||
"$@"
|
||||
ret=$?
|
||||
|
||||
if test -f "$cofile"; then
|
||||
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
|
||||
elif test -f "${cofile}bj"; then
|
||||
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
|
||||
fi
|
||||
|
||||
rmdir "$lockdir"
|
||||
exit $ret
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
47
pcre2/config-cmake.h.in
Normal file
47
pcre2/config-cmake.h.in
Normal file
@ -0,0 +1,47 @@
|
||||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_INTTYPES_H 1
|
||||
#cmakedefine HAVE_STDINT_H 1
|
||||
#cmakedefine HAVE_STRERROR 1
|
||||
#cmakedefine HAVE_SYS_STAT_H 1
|
||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||
#cmakedefine HAVE_UNISTD_H 1
|
||||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
|
||||
#cmakedefine HAVE_BCOPY 1
|
||||
#cmakedefine HAVE_MEMMOVE 1
|
||||
|
||||
#cmakedefine PCRE2_STATIC 1
|
||||
|
||||
#cmakedefine SUPPORT_PCRE2_8 1
|
||||
#cmakedefine SUPPORT_PCRE2_16 1
|
||||
#cmakedefine SUPPORT_PCRE2_32 1
|
||||
#cmakedefine PCRE2_DEBUG 1
|
||||
|
||||
#cmakedefine SUPPORT_LIBBZ2 1
|
||||
#cmakedefine SUPPORT_LIBEDIT 1
|
||||
#cmakedefine SUPPORT_LIBREADLINE 1
|
||||
#cmakedefine SUPPORT_LIBZ 1
|
||||
|
||||
#cmakedefine SUPPORT_JIT 1
|
||||
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
|
||||
#cmakedefine SUPPORT_UNICODE 1
|
||||
#cmakedefine SUPPORT_VALGRIND 1
|
||||
|
||||
#cmakedefine BSR_ANYCRLF 1
|
||||
#cmakedefine EBCDIC 1
|
||||
#cmakedefine EBCDIC_NL25 1
|
||||
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_RECURSION @PCRE2_MATCH_LIMIT_RECURSION@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
||||
|
||||
#define MAX_NAME_SIZE 32
|
||||
#define MAX_NAME_COUNT 10000
|
||||
|
||||
/* end config.h for CMake builds */
|
1421
pcre2/config.guess
vendored
Executable file
1421
pcre2/config.guess
vendored
Executable file
File diff suppressed because it is too large
Load Diff
1807
pcre2/config.sub
vendored
Executable file
1807
pcre2/config.sub
vendored
Executable file
File diff suppressed because it is too large
Load Diff
18234
pcre2/configure
vendored
Executable file
18234
pcre2/configure
vendored
Executable file
File diff suppressed because it is too large
Load Diff
905
pcre2/configure.ac
Normal file
905
pcre2/configure.ac
Normal file
@ -0,0 +1,905 @@
|
||||
dnl Process this file with autoconf to produce a configure script.
|
||||
|
||||
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
|
||||
dnl the leading zeros may cause them to be treated as invalid octal constants
|
||||
dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
|
||||
dnl a check further down that throws an error if 08 or 09 are used.
|
||||
|
||||
dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [20])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2015-06-30])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [2:0:2])
|
||||
m4_define(libpcre2_16_version, [2:0:2])
|
||||
m4_define(libpcre2_32_version, [2:0:2])
|
||||
m4_define(libpcre2_posix_version, [0:0:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
||||
AC_CONFIG_SRCDIR([src/pcre2.h.in])
|
||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
AC_CONFIG_HEADERS(src/config.h)
|
||||
|
||||
# This is a new thing required to stop a warning from automake 1.12
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
||||
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any
|
||||
# other compiler. There doesn't seem to be a standard way of getting rid of the
|
||||
# -g (which I don't think is needed for a production library). This fudge seems
|
||||
# to achieve the necessary. First, we remember the externally set values of
|
||||
# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is
|
||||
# not set, it will be set to Autoconf's defaults. Afterwards, if the original
|
||||
# values were not set, remove the -g from the Autoconf defaults.
|
||||
|
||||
remember_set_CFLAGS="$CFLAGS"
|
||||
|
||||
AC_PROG_CC
|
||||
AM_PROG_CC_C_O
|
||||
|
||||
if test "x$remember_set_CFLAGS" = "x"
|
||||
then
|
||||
if test "$CFLAGS" = "-g -O2"
|
||||
then
|
||||
CFLAGS="-O2"
|
||||
elif test "$CFLAGS" = "-g"
|
||||
then
|
||||
CFLAGS=""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for a 64-bit integer type
|
||||
AC_TYPE_INT64_T
|
||||
|
||||
AC_PROG_INSTALL
|
||||
AC_LIBTOOL_WIN32_DLL
|
||||
LT_INIT
|
||||
AC_PROG_LN_S
|
||||
|
||||
# Check for GCC visibility feature
|
||||
|
||||
PCRE2_VISIBILITY
|
||||
|
||||
# Versioning
|
||||
|
||||
PCRE2_MAJOR="pcre2_major"
|
||||
PCRE2_MINOR="pcre2_minor"
|
||||
PCRE2_PRERELEASE="pcre2_prerelease"
|
||||
PCRE2_DATE="pcre2_date"
|
||||
|
||||
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
|
||||
then
|
||||
echo "***"
|
||||
echo "*** Minor version number $PCRE2_MINOR must not be used. ***"
|
||||
echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***"
|
||||
echo "***"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AC_SUBST(PCRE2_MAJOR)
|
||||
AC_SUBST(PCRE2_MINOR)
|
||||
AC_SUBST(PCRE2_PRERELEASE)
|
||||
AC_SUBST(PCRE2_DATE)
|
||||
|
||||
# Set a more sensible default value for $(htmldir).
|
||||
if test "x$htmldir" = 'x${docdir}'
|
||||
then
|
||||
htmldir='${docdir}/html'
|
||||
fi
|
||||
|
||||
# Force an error for PCRE1 size options
|
||||
AC_ARG_ENABLE(pcre8,,,enable_pcre8=no)
|
||||
AC_ARG_ENABLE(pcre16,,,enable_pcre16=no)
|
||||
AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
|
||||
|
||||
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
||||
then
|
||||
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Handle --disable-pcre2-8 (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2-8,
|
||||
AS_HELP_STRING([--disable-pcre2-8],
|
||||
[disable 8 bit character support]),
|
||||
, enable_pcre2_8=unset)
|
||||
AC_SUBST(enable_pcre2_8)
|
||||
|
||||
# Handle --enable-pcre2-16 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-16,
|
||||
AS_HELP_STRING([--enable-pcre2-16],
|
||||
[enable 16 bit character support]),
|
||||
, enable_pcre2_16=unset)
|
||||
AC_SUBST(enable_pcre2_16)
|
||||
|
||||
# Handle --enable-pcre2-32 (disabled by default)
|
||||
AC_ARG_ENABLE(pcre2-32,
|
||||
AS_HELP_STRING([--enable-pcre2-32],
|
||||
[enable 32 bit character support]),
|
||||
, enable_pcre2_32=unset)
|
||||
AC_SUBST(enable_pcre2_32)
|
||||
|
||||
# Handle --dnable-debug (disabled by default)
|
||||
AC_ARG_ENABLE(debug,
|
||||
AS_HELP_STRING([--enable-debug],
|
||||
[enable debugging code]),
|
||||
, enable_debug=no)
|
||||
|
||||
# Handle --enable-jit (disabled by default)
|
||||
AC_ARG_ENABLE(jit,
|
||||
AS_HELP_STRING([--enable-jit],
|
||||
[enable Just-In-Time compiling support]),
|
||||
, enable_jit=no)
|
||||
|
||||
# Handle --disable-pcre2grep-jit (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-jit,
|
||||
AS_HELP_STRING([--disable-pcre2grep-jit],
|
||||
[disable JIT support in pcre2grep]),
|
||||
, enable_pcre2grep_jit=yes)
|
||||
|
||||
# Handle --enable-rebuild-chartables
|
||||
AC_ARG_ENABLE(rebuild-chartables,
|
||||
AS_HELP_STRING([--enable-rebuild-chartables],
|
||||
[rebuild character tables in current locale]),
|
||||
, enable_rebuild_chartables=no)
|
||||
|
||||
# Handle --disable-unicode (enabled by default)
|
||||
AC_ARG_ENABLE(unicode,
|
||||
AS_HELP_STRING([--disable-unicode],
|
||||
[disable Unicode support]),
|
||||
, enable_unicode=unset)
|
||||
|
||||
# Handle newline options
|
||||
ac_pcre2_newline=lf
|
||||
AC_ARG_ENABLE(newline-is-cr,
|
||||
AS_HELP_STRING([--enable-newline-is-cr],
|
||||
[use CR as newline character]),
|
||||
ac_pcre2_newline=cr)
|
||||
AC_ARG_ENABLE(newline-is-lf,
|
||||
AS_HELP_STRING([--enable-newline-is-lf],
|
||||
[use LF as newline character (default)]),
|
||||
ac_pcre2_newline=lf)
|
||||
AC_ARG_ENABLE(newline-is-crlf,
|
||||
AS_HELP_STRING([--enable-newline-is-crlf],
|
||||
[use CRLF as newline sequence]),
|
||||
ac_pcre2_newline=crlf)
|
||||
AC_ARG_ENABLE(newline-is-anycrlf,
|
||||
AS_HELP_STRING([--enable-newline-is-anycrlf],
|
||||
[use CR, LF, or CRLF as newline sequence]),
|
||||
ac_pcre2_newline=anycrlf)
|
||||
AC_ARG_ENABLE(newline-is-any,
|
||||
AS_HELP_STRING([--enable-newline-is-any],
|
||||
[use any valid Unicode newline sequence]),
|
||||
ac_pcre2_newline=any)
|
||||
enable_newline="$ac_pcre2_newline"
|
||||
|
||||
# Handle --enable-bsr-anycrlf
|
||||
AC_ARG_ENABLE(bsr-anycrlf,
|
||||
AS_HELP_STRING([--enable-bsr-anycrlf],
|
||||
[\R matches only CR, LF, CRLF by default]),
|
||||
, enable_bsr_anycrlf=no)
|
||||
|
||||
# Handle --enable-ebcdic
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
AS_HELP_STRING([--enable-ebcdic],
|
||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||
, enable_ebcdic=no)
|
||||
|
||||
# Handle --enable-ebcdic-nl25
|
||||
AC_ARG_ENABLE(ebcdic-nl25,
|
||||
AS_HELP_STRING([--enable-ebcdic-nl25],
|
||||
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
|
||||
, enable_ebcdic_nl25=no)
|
||||
|
||||
# Handle --disable-stack-for-recursion
|
||||
AC_ARG_ENABLE(stack-for-recursion,
|
||||
AS_HELP_STRING([--disable-stack-for-recursion],
|
||||
[don't use stack recursion when matching]),
|
||||
, enable_stack_for_recursion=yes)
|
||||
|
||||
# Handle --enable-pcre2grep-libz
|
||||
AC_ARG_ENABLE(pcre2grep-libz,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libz],
|
||||
[link pcre2grep with libz to handle .gz files]),
|
||||
, enable_pcre2grep_libz=no)
|
||||
|
||||
# Handle --enable-pcre2grep-libbz2
|
||||
AC_ARG_ENABLE(pcre2grep-libbz2,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libbz2],
|
||||
[link pcre2grep with libbz2 to handle .bz2 files]),
|
||||
, enable_pcre2grep_libbz2=no)
|
||||
|
||||
# Handle --with-pcre2grep-bufsize=N
|
||||
AC_ARG_WITH(pcre2grep-bufsize,
|
||||
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
||||
[pcre2grep buffer size (default=20480, minimum=8192)]),
|
||||
, with_pcre2grep_bufsize=20480)
|
||||
|
||||
# Handle --enable-pcre2test-libedit
|
||||
AC_ARG_ENABLE(pcre2test-libedit,
|
||||
AS_HELP_STRING([--enable-pcre2test-libedit],
|
||||
[link pcre2test with libedit]),
|
||||
, enable_pcre2test_libedit=no)
|
||||
|
||||
# Handle --enable-pcre2test-libreadline
|
||||
AC_ARG_ENABLE(pcre2test-libreadline,
|
||||
AS_HELP_STRING([--enable-pcre2test-libreadline],
|
||||
[link pcre2test with libreadline]),
|
||||
, enable_pcre2test_libreadline=no)
|
||||
|
||||
# Handle --with-link-size=N
|
||||
AC_ARG_WITH(link-size,
|
||||
AS_HELP_STRING([--with-link-size=N],
|
||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||
, with_link_size=2)
|
||||
|
||||
# Handle --with-parens-nest-limit=N
|
||||
AC_ARG_WITH(parens-nest-limit,
|
||||
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
[default limit on internal looping (default=10000000)]),
|
||||
, with_match_limit=10000000)
|
||||
|
||||
# Handle --with-match-limit_recursion=N
|
||||
#
|
||||
# Note: In config.h, the default is to define MATCH_LIMIT_RECURSION
|
||||
# symbolically as MATCH_LIMIT, which in turn is defined to be some numeric
|
||||
# value (e.g. 10000000). MATCH_LIMIT_RECURSION can otherwise be set to some
|
||||
# different numeric value (or even the same numeric value as MATCH_LIMIT,
|
||||
# though no longer defined in terms of the latter).
|
||||
#
|
||||
AC_ARG_WITH(match-limit-recursion,
|
||||
AS_HELP_STRING([--with-match-limit-recursion=N],
|
||||
[default limit on internal recursion (default=MATCH_LIMIT)]),
|
||||
, with_match_limit_recursion=MATCH_LIMIT)
|
||||
|
||||
# Handle --enable-valgrind
|
||||
AC_ARG_ENABLE(valgrind,
|
||||
AS_HELP_STRING([--enable-valgrind],
|
||||
[valgrind support]),
|
||||
, enable_valgrind=no)
|
||||
|
||||
# Enable code coverage reports using gcov
|
||||
AC_ARG_ENABLE(coverage,
|
||||
AS_HELP_STRING([--enable-coverage],
|
||||
[enable code coverage reports using gcov]),
|
||||
, enable_coverage=no)
|
||||
|
||||
# Set the default value for pcre2-8
|
||||
if test "x$enable_pcre2_8" = "xunset"
|
||||
then
|
||||
enable_pcre2_8=yes
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-16
|
||||
if test "x$enable_pcre2_16" = "xunset"
|
||||
then
|
||||
enable_pcre2_16=no
|
||||
fi
|
||||
|
||||
# Set the default value for pcre2-32
|
||||
if test "x$enable_pcre2_32" = "xunset"
|
||||
then
|
||||
enable_pcre2_32=no
|
||||
fi
|
||||
|
||||
# Make sure at least one library is selected
|
||||
if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono"
|
||||
then
|
||||
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
|
||||
fi
|
||||
|
||||
# Unicode is enabled by default.
|
||||
if test "x$enable_unicode" = "xunset"
|
||||
then
|
||||
enable_unicode=yes
|
||||
fi
|
||||
|
||||
# Convert the newline identifier into the appropriate integer value. These must
|
||||
# agree with the PCRE2_NEWLINE_xxx values in pcre2.h.
|
||||
|
||||
case "$enable_newline" in
|
||||
cr) ac_pcre2_newline_value=1 ;;
|
||||
lf) ac_pcre2_newline_value=2 ;;
|
||||
crlf) ac_pcre2_newline_value=3 ;;
|
||||
any) ac_pcre2_newline_value=4 ;;
|
||||
anycrlf) ac_pcre2_newline_value=5 ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
|
||||
;;
|
||||
esac
|
||||
|
||||
# --enable-ebcdic-nl25 implies --enable-ebcdic
|
||||
if test "x$enable_ebcdic_nl25" = "xyes"; then
|
||||
enable_ebcdic=yes
|
||||
fi
|
||||
|
||||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode.
|
||||
#
|
||||
if test "x$enable_ebcdic" = "xyes"; then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_unicode" = "xyes"; then
|
||||
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check argument to --with-link-size
|
||||
case "$with_link_size" in
|
||||
2|3|4) ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$with_link_size\" to --with-link-size option])
|
||||
;;
|
||||
esac
|
||||
|
||||
AH_TOP([
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */])
|
||||
|
||||
# Checks for header files.
|
||||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||
|
||||
# Conditional compilation
|
||||
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
|
||||
AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
|
||||
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
|
||||
AC_C_CONST
|
||||
AC_TYPE_SIZE_T
|
||||
|
||||
# Checks for library functions.
|
||||
|
||||
AC_CHECK_FUNCS(bcopy memmove strerror)
|
||||
|
||||
# Check for the availability of libz (aka zlib)
|
||||
|
||||
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
|
||||
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
||||
|
||||
# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
|
||||
# as for libz. However, this had the following problem, diagnosed and fixed by
|
||||
# a user:
|
||||
#
|
||||
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
|
||||
# under Win32.
|
||||
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
|
||||
# therefore missing the function definition.
|
||||
# - The compiler thus generates a "C" signature for the test function.
|
||||
# - The linker fails to find the "C" function.
|
||||
# - PCRE2 fails to configure if asked to do so against libbz2.
|
||||
#
|
||||
# Solution:
|
||||
#
|
||||
# - Replace the AC_CHECK_LIB test with a custom test.
|
||||
|
||||
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
|
||||
# Original test
|
||||
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
|
||||
#
|
||||
# Custom test follows
|
||||
|
||||
AC_MSG_CHECKING([for libbz2])
|
||||
OLD_LIBS="$LIBS"
|
||||
LIBS="$LIBS -lbz2"
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
|
||||
#ifdef HAVE_BZLIB_H
|
||||
#include <bzlib.h>
|
||||
#endif]],
|
||||
[[return (int)BZ2_bzopen("conftest", "rb");]])],
|
||||
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
|
||||
AC_MSG_RESULT([no]))
|
||||
LIBS="$OLD_LIBS"
|
||||
|
||||
# Check for the availabiity of libreadline
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
|
||||
[unset ac_cv_lib_readline_readline;
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
|
||||
[LIBREADLINE=""],
|
||||
[-ltermcap])],
|
||||
[-lncursesw])],
|
||||
[-lncurses])],
|
||||
[-lcurses])],
|
||||
[-ltinfo])])
|
||||
AC_SUBST(LIBREADLINE)
|
||||
if test -n "$LIBREADLINE"; then
|
||||
if test "$LIBREADLINE" != "-lreadline"; then
|
||||
echo "-lreadline needs $LIBREADLINE"
|
||||
LIBREADLINE="-lreadline $LIBREADLINE"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
# Check for the availability of libedit. Different distributions put its
|
||||
# headers in different places. Try to cover the most common ones.
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
|
||||
[AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
|
||||
[AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
|
||||
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
|
||||
fi
|
||||
|
||||
# This facilitates -ansi builds under Linux
|
||||
dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc])
|
||||
|
||||
PCRE2_STATIC_CFLAG=""
|
||||
if test "x$enable_shared" = "xno" ; then
|
||||
AC_DEFINE([PCRE2_STATIC], [1], [
|
||||
Define to any value if linking statically (TODO: make nice with Libtool)])
|
||||
PCRE2_STATIC_CFLAG="-DPCRE2_STATIC"
|
||||
fi
|
||||
AC_SUBST(PCRE2_STATIC_CFLAG)
|
||||
|
||||
# Here is where PCRE2-specific defines are handled
|
||||
|
||||
if test "$enable_pcre2_8" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_8], [], [
|
||||
Define to any value to enable the 8 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_16" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_16], [], [
|
||||
Define to any value to enable the 16 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2_32" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2_32], [], [
|
||||
Define to any value to enable the 32 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_debug" = "yes"; then
|
||||
AC_DEFINE([PCRE2_DEBUG], [], [
|
||||
Define to any value to include debugging code.])
|
||||
fi
|
||||
|
||||
# Unless running under Windows, JIT support requires pthreads.
|
||||
|
||||
if test "$enable_jit" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||
CC="$PTHREAD_CC"
|
||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_JIT], [], [
|
||||
Define to any value to enable support for Just-In-Time compiling.])
|
||||
else
|
||||
enable_pcre2grep_jit="no"
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_jit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
|
||||
Define to any value to enable JIT support in pcre2grep.])
|
||||
fi
|
||||
|
||||
if test "$enable_unicode" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||
Define to any value to enable support for Unicode and UTF encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible
|
||||
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||
code *or* ASCII/Unicode, but not both at once.])
|
||||
fi
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
AC_DEFINE([HEAP_MATCH_RECURSE], [], [
|
||||
PCRE2 uses recursive function calls to handle backtracking while
|
||||
matching. This can sometimes be a problem on systems that have
|
||||
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
|
||||
version that doesn't use recursion in the match() function; instead
|
||||
it creates its own stack by steam using memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libz, so that it is
|
||||
able to handle .gz files.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||
Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files.])
|
||||
fi
|
||||
|
||||
if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||
AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
|
||||
with_pcre2grep_bufsize="8192"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||
The value of PCRE2GREP_BUFSIZE determines the size of buffer used by pcre2grep
|
||||
to hold parts of the file it is searching. This is also the minimum value.
|
||||
The actual amount of memory used by pcre2grep is three times this number,
|
||||
because it allows for the buffering of "before" and "after" lines.])
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||
Define to any value to allow pcre2test to be linked with libedit.])
|
||||
LIBREADLINE="$LIBEDIT"
|
||||
elif test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||
Define to any value to allow pcre2test to be linked with libreadline.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
and 5 (ANYCRLF).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||
By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||
The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which
|
||||
allows for compiled patterns up to 64K long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4
|
||||
bytes instead. This allows for longer patterns in extreme cases.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre2_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular
|
||||
expressions that take for ever to determine that they do not match.
|
||||
The default is set very large so that it does not accidentally catch
|
||||
legitimate cases.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
|
||||
The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable
|
||||
to limit the depth of recursive calls of match() more strictly, in
|
||||
order to restrict the maximum amount of stack (or heap, if
|
||||
HEAP_MATCH_RECURSE is defined) that is used. The value of
|
||||
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
||||
have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
|
||||
There is a runtime method for setting a different limit.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_COUNT], [10000], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
against integer overflow caused by enormously large patterns.])
|
||||
|
||||
AH_VERBATIM([PCRE2_EXP_DEFN], [
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
#undef PCRE2_EXP_DEFN])
|
||||
|
||||
if test "$enable_ebcdic" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32.])
|
||||
fi
|
||||
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
|
||||
In an EBCDIC environment, define this macro to any value to arrange for
|
||||
the NL character to be 0x25 instead of the default 0x15. NL plays the role
|
||||
that LF does in an ASCII/Unicode environment.])
|
||||
fi
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||
Define to any value for valgrind support to find invalid memory reads.])
|
||||
fi
|
||||
|
||||
# Platform specific issues
|
||||
NO_UNDEFINED=
|
||||
EXPORT_ALL_SYMBOLS=
|
||||
case $host_os in
|
||||
cygwin* | mingw* )
|
||||
if test X"$enable_shared" = Xyes; then
|
||||
NO_UNDEFINED="-no-undefined"
|
||||
EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# The extra LDFLAGS for each particular library. The libpcre2*_version values
|
||||
# are m4 variables, assigned above.
|
||||
|
||||
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_8_version"
|
||||
|
||||
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_16_version"
|
||||
|
||||
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_32_version"
|
||||
|
||||
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info libpcre2_posix_version"
|
||||
|
||||
AC_SUBST(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_16_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_32_LDFLAGS)
|
||||
AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
||||
|
||||
# When we run 'make distcheck', use these arguments. Turning off compiler
|
||||
# optimization makes it run faster.
|
||||
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit --enable-utf"
|
||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||
|
||||
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
|
||||
# specified, the relevant library is available.
|
||||
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
if test "$HAVE_ZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBZ" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libz because libz was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBZ="-lz"
|
||||
fi
|
||||
AC_SUBST(LIBZ)
|
||||
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
if test "$HAVE_BZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBBZ2" != "1"; then
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBBZ2="-lbz2"
|
||||
fi
|
||||
AC_SUBST(LIBBZ2)
|
||||
|
||||
# Similarly for --enable-pcre2test-readline
|
||||
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
|
||||
"$HAVE_READLINE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
|
||||
echo "** nor readline/readline.h was found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBEDIT"; then
|
||||
echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
if test "$HAVE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_HISTORY_H" != "1"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBREADLINE"; then
|
||||
echo "** Cannot --enable-pcre2test-readline because readline library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Handle valgrind support
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
m4_ifdef([PKG_CHECK_MODULES],
|
||||
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
|
||||
[AC_MSG_ERROR([pkg-config not supported])])
|
||||
fi
|
||||
|
||||
# Handle code coverage reporting support
|
||||
if test "$enable_coverage" = "yes"; then
|
||||
if test "x$GCC" != "xyes"; then
|
||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||
fi
|
||||
|
||||
# ccache is incompatible with gcov
|
||||
AC_PATH_PROG([SHTOOL],[shtool],[false])
|
||||
case `$SHTOOL path $CC` in
|
||||
*ccache*) cc_ccache=yes;;
|
||||
*) cc_ccache=no;;
|
||||
esac
|
||||
|
||||
if test "$cc_ccache" = "yes"; then
|
||||
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
|
||||
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([LCOV],[the ltp lcov program])
|
||||
AC_PATH_PROG([LCOV],[lcov],[false])
|
||||
if test "x$LCOV" = "xfalse"; then
|
||||
AC_MSG_ERROR([lcov not found])
|
||||
fi
|
||||
|
||||
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
|
||||
AC_PATH_PROG([GENHTML],[genhtml],[false])
|
||||
if test "x$GENHTML" = "xfalse"; then
|
||||
AC_MSG_ERROR([genhtml not found])
|
||||
fi
|
||||
|
||||
# Set flags needed for gcov
|
||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_LIBS="-lgcov"
|
||||
AC_SUBST([GCOV_CFLAGS])
|
||||
AC_SUBST([GCOV_CXXFLAGS])
|
||||
AC_SUBST([GCOV_LIBS])
|
||||
fi # enable_coverage
|
||||
|
||||
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
|
||||
|
||||
# Produce these files, in addition to config.h.
|
||||
AC_CONFIG_FILES(
|
||||
Makefile
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
pcre2-config
|
||||
src/pcre2.h
|
||||
)
|
||||
|
||||
# Make the generated script files executable.
|
||||
AC_CONFIG_COMMANDS([script-chmod], [chmod a+x pcre2-config])
|
||||
|
||||
# Make sure that pcre2_chartables.c is removed in case the method for
|
||||
# creating it was changed by reconfiguration.
|
||||
AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre2_chartables.c])
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
# Print out a nice little message after configure is run displaying the
|
||||
# chosen options.
|
||||
|
||||
ebcdic_nl_code=n/a
|
||||
if test "$enable_ebcdic_nl25" = "yes"; then
|
||||
ebcdic_nl_code=0x25
|
||||
elif test "$enable_ebcdic" = "yes"; then
|
||||
ebcdic_nl_code=0x15
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
$PACKAGE-$VERSION configuration summary:
|
||||
|
||||
Install prefix .................. : ${prefix}
|
||||
C preprocessor .................. : ${CPP}
|
||||
C compiler ...................... : ${CC}
|
||||
Linker .......................... : ${LD}
|
||||
C preprocessor flags ............ : ${CPPFLAGS}
|
||||
C compiler flags ................ : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||
Linker flags .................... : ${LDFLAGS}
|
||||
Extra libraries ................. : ${LIBS}
|
||||
|
||||
Build 8-bit pcre2 library ....... : ${enable_pcre2_8}
|
||||
Build 16-bit pcre2 library ...... : ${enable_pcre2_16}
|
||||
Build 32-bit pcre2 library ...... : ${enable_pcre2_32}
|
||||
Include debugging code .......... : ${enable_debug}
|
||||
Enable JIT compiling support .... : ${enable_jit}
|
||||
Enable Unicode support .......... : ${enable_unicode}
|
||||
Newline char/sequence ........... : ${enable_newline}
|
||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||
EBCDIC coding ................... : ${enable_ebcdic}
|
||||
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||
Internal link size .............. : ${with_link_size}
|
||||
Nested parentheses limit ........ : ${with_parens_nest_limit}
|
||||
Match limit ..................... : ${with_match_limit}
|
||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||
Build shared libs ............... : ${enable_shared}
|
||||
Build static libs ............... : ${enable_static}
|
||||
Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
|
||||
Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
|
||||
Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
|
||||
Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
|
||||
Link pcre2test with libedit ..... : ${enable_pcre2test_libedit}
|
||||
Link pcre2test with libreadline . : ${enable_pcre2test_libreadline}
|
||||
Valgrind support ................ : ${enable_valgrind}
|
||||
Code coverage ................... : ${enable_coverage}
|
||||
|
||||
EOF
|
||||
|
||||
dnl end configure.ac
|
791
pcre2/depcomp
Executable file
791
pcre2/depcomp
Executable file
@ -0,0 +1,791 @@
|
||||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2013-05-30.07; # UTC
|
||||
|
||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||
as side-effects.
|
||||
|
||||
Environment variables:
|
||||
depmode Dependency tracking mode.
|
||||
source Source file read by 'PROGRAMS ARGS'.
|
||||
object Object file output by 'PROGRAMS ARGS'.
|
||||
DEPDIR directory where to store dependencies.
|
||||
depfile Dependency file to output.
|
||||
tmpdepfile Temporary file to use when outputting dependencies.
|
||||
libtool Whether libtool is used (yes/no).
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "depcomp $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
# Get the directory component of the given path, and save it in the
|
||||
# global variables '$dir'. Note that this directory component will
|
||||
# be either empty or ending with a '/' character. This is deliberate.
|
||||
set_dir_from ()
|
||||
{
|
||||
case $1 in
|
||||
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
|
||||
*) dir=;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Get the suffix-stripped basename of the given path, and save it the
|
||||
# global variable '$base'.
|
||||
set_base_from ()
|
||||
{
|
||||
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
|
||||
}
|
||||
|
||||
# If no dependency file was actually created by the compiler invocation,
|
||||
# we still have to create a dummy depfile, to avoid errors with the
|
||||
# Makefile "include basename.Plo" scheme.
|
||||
make_dummy_depfile ()
|
||||
{
|
||||
echo "#dummy" > "$depfile"
|
||||
}
|
||||
|
||||
# Factor out some common post-processing of the generated depfile.
|
||||
# Requires the auxiliary global variable '$tmpdepfile' to be set.
|
||||
aix_post_process_depfile ()
|
||||
{
|
||||
# If the compiler actually managed to produce a dependency file,
|
||||
# post-process it.
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form 'foo.o: dependency.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# $object: dependency.h
|
||||
# and one to simply output
|
||||
# dependency.h:
|
||||
# which is needed to avoid the deleted-header problem.
|
||||
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
|
||||
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
|
||||
} > "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
}
|
||||
|
||||
# A tabulation character.
|
||||
tab=' '
|
||||
# A newline character.
|
||||
nl='
|
||||
'
|
||||
# Character ranges might be problematic outside the C locale.
|
||||
# These definitions help.
|
||||
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
lower=abcdefghijklmnopqrstuvwxyz
|
||||
digits=0123456789
|
||||
alpha=${upper}${lower}
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||
depfile=${depfile-`echo "$object" |
|
||||
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Avoid interferences from the environment.
|
||||
gccflag= dashmflag=
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
# here, because this file can only contain one case statement.
|
||||
if test "$depmode" = hp; then
|
||||
# HP compiler uses -M and no extra arg.
|
||||
gccflag=-M
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
if test "$depmode" = dashXmstdout; then
|
||||
# This is just like dashmstdout with a different argument.
|
||||
dashmflag=-xM
|
||||
depmode=dashmstdout
|
||||
fi
|
||||
|
||||
cygpath_u="cygpath -u -f -"
|
||||
if test "$depmode" = msvcmsys; then
|
||||
# This is just like msvisualcpp but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvisualcpp
|
||||
fi
|
||||
|
||||
if test "$depmode" = msvc7msys; then
|
||||
# This is just like msvc7 but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvc7
|
||||
fi
|
||||
|
||||
if test "$depmode" = xlc; then
|
||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
|
||||
gccflag=-qmakedep=gcc,-MF
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
case "$depmode" in
|
||||
gcc3)
|
||||
## gcc 3 implements dependency tracking that does exactly what
|
||||
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||
## the command line argument order; so add the flags where they
|
||||
## appear in depend2.am. Note that the slowdown incurred here
|
||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||
*) set fnord "$@" "$arg" ;;
|
||||
esac
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
mv "$tmpdepfile" "$depfile"
|
||||
;;
|
||||
|
||||
gcc)
|
||||
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
|
||||
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
|
||||
## (see the conditional assignment to $gccflag above).
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say). Also, it might not be
|
||||
## supported by the other compilers which use the 'gcc' depmode.
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
gccflag=-MD,
|
||||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The second -e expression handles DOS-style file names with drive
|
||||
# letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the "deleted header file" problem.
|
||||
## The problem is that when a header file which appears in a .P file
|
||||
## is deleted, the dependency causes make to die (because there is
|
||||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
## Some versions of gcc put a space before the ':'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||
## to the object. Take care to not repeat it in the output.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
sgi)
|
||||
if test "$libtool" = yes; then
|
||||
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||
else
|
||||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
|
||||
| tr "$nl" ' ' >> "$depfile"
|
||||
echo >> "$depfile"
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
xlc)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
aix)
|
||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||
# in a .u file. In older versions, this file always lives in the
|
||||
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
tmpdepfile3=$dir.libs/$base.u
|
||||
"$@" -Wc,-M
|
||||
else
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$dir$base.u
|
||||
tmpdepfile3=$dir$base.u
|
||||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
tcc)
|
||||
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
|
||||
# FIXME: That version still under development at the moment of writing.
|
||||
# Make that this statement remains true also for stable, released
|
||||
# versions.
|
||||
# It will wrap lines (doesn't matter whether long or short) with a
|
||||
# trailing '\', as in:
|
||||
#
|
||||
# foo.o : \
|
||||
# foo.c \
|
||||
# foo.h \
|
||||
#
|
||||
# It will put a trailing '\' even on the last line, and will use leading
|
||||
# spaces rather than leading tabs (at least since its commit 0394caf7
|
||||
# "Emit spaces for -MD").
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
|
||||
# We have to change lines of the first kind to '$object: \'.
|
||||
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
|
||||
# And for each line of the second kind, we have to emit a 'dep.h:'
|
||||
# dummy dependency, to avoid the deleted-header problem.
|
||||
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
## The order of this option in the case statement is important, since the
|
||||
## shell code in configure will try each of these formats in the order
|
||||
## listed in this file. A plain '-MD' option would be understood by many
|
||||
## compilers, so we must ensure this comes after the gcc and icc options.
|
||||
pgcc)
|
||||
# Portland's C compiler understands '-MD'.
|
||||
# Will always output deps to 'file.d' where file is the root name of the
|
||||
# source file under compilation, even if file resides in a subdirectory.
|
||||
# The object file name does not affect the name of the '.d' file.
|
||||
# pgcc 10.2 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using '\' :
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
set_dir_from "$object"
|
||||
# Use the source, not the object, to determine the base name, since
|
||||
# that's sadly what pgcc will do too.
|
||||
set_base_from "$source"
|
||||
tmpdepfile=$base.d
|
||||
|
||||
# For projects that build the same source file twice into different object
|
||||
# files, the pgcc approach of using the *source* file root name can cause
|
||||
# problems in parallel builds. Use a locking strategy to avoid stomping on
|
||||
# the same $tmpdepfile.
|
||||
lockdir=$base.d-lock
|
||||
trap "
|
||||
echo '$0: caught signal, cleaning up...' >&2
|
||||
rmdir '$lockdir'
|
||||
exit 1
|
||||
" 1 2 13 15
|
||||
numtries=100
|
||||
i=$numtries
|
||||
while test $i -gt 0; do
|
||||
# mkdir is a portable test-and-set.
|
||||
if mkdir "$lockdir" 2>/dev/null; then
|
||||
# This process acquired the lock.
|
||||
"$@" -MD
|
||||
stat=$?
|
||||
# Release the lock.
|
||||
rmdir "$lockdir"
|
||||
break
|
||||
else
|
||||
# If the lock is being held by a different process, wait
|
||||
# until the winning process is done or we timeout.
|
||||
while test -d "$lockdir" && test $i -gt 0; do
|
||||
sleep 1
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
fi
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
trap - 1 2 13 15
|
||||
if test $i -le 0; then
|
||||
echo "$0: failed to acquire lock after $numtries attempts" >&2
|
||||
echo "$0: check lockdir '$lockdir'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form `foo.o: dependent.h',
|
||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp2)
|
||||
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||
# compilers, which have integrated preprocessors. The correct option
|
||||
# to use with these is +Maked; it writes dependencies to a file named
|
||||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
"$@" -Wc,+Maked
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add 'dependent.h:' lines.
|
||||
sed -ne '2,${
|
||||
s/^ *//
|
||||
s/ \\*$//
|
||||
s/$/:/
|
||||
p
|
||||
}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
||||
tru64)
|
||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
|
||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in 'foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# Libtool generates 2 separate objects for the 2 libraries. These
|
||||
# two compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
|
||||
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
# Same post-processing that is required for AIX mode.
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
msvc7)
|
||||
if test "$libtool" = yes; then
|
||||
showIncludes=-Wc,-showIncludes
|
||||
else
|
||||
showIncludes=-showIncludes
|
||||
fi
|
||||
"$@" $showIncludes > "$tmpdepfile"
|
||||
stat=$?
|
||||
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The first sed program below extracts the file names and escapes
|
||||
# backslashes for cygpath. The second sed program outputs the file
|
||||
# name when reading, but also accumulates all include files in the
|
||||
# hold buffer in order to output them again at the end. This only
|
||||
# works with sed implementations that can handle large buffers.
|
||||
sed < "$tmpdepfile" -n '
|
||||
/^Note: including file: *\(.*\)/ {
|
||||
s//\1/
|
||||
s/\\/\\\\/g
|
||||
p
|
||||
}' | $cygpath_u | sort -u | sed -n '
|
||||
s/ /\\ /g
|
||||
s/\(.*\)/'"$tab"'\1 \\/p
|
||||
s/.\(.*\) \\/\1:/
|
||||
H
|
||||
$ {
|
||||
s/.*/'"$tab"'/
|
||||
G
|
||||
p
|
||||
}' >> "$depfile"
|
||||
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvc7msys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
#nosideeffect)
|
||||
# This comment above is used by automake to tell side-effect
|
||||
# dependency tracking mechanisms from slower ones.
|
||||
|
||||
dashmstdout)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
test -z "$dashmflag" && dashmflag=-M
|
||||
# Require at least two characters before searching for ':'
|
||||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this sed invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
dashXmstdout)
|
||||
# This case only exists to satisfy depend.m4. It is never actually
|
||||
# run, as this mode is specially recognized in the preamble.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
makedepend)
|
||||
"$@" || exit $?
|
||||
# Remove any Libtool call
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
# X makedepend
|
||||
shift
|
||||
cleared=no eat=no
|
||||
for arg
|
||||
do
|
||||
case $cleared in
|
||||
no)
|
||||
set ""; shift
|
||||
cleared=yes ;;
|
||||
esac
|
||||
if test $eat = yes; then
|
||||
eat=no
|
||||
continue
|
||||
fi
|
||||
case "$arg" in
|
||||
-D*|-I*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
# Strip any option that makedepend may not understand. Remove
|
||||
# the object too, otherwise makedepend will parse it as a source file.
|
||||
-arch)
|
||||
eat=yes ;;
|
||||
-*|$object)
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||
touch "$tmpdepfile"
|
||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||
rm -f "$depfile"
|
||||
# makedepend may prepend the VPATH from the source file name to the object.
|
||||
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process the last invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed '1,2d' "$tmpdepfile" \
|
||||
| tr ' ' "$nl" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
cpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
"$@" -E \
|
||||
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
| sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvisualcpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case "$arg" in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||
set fnord "$@"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
"$@" -E 2>/dev/null |
|
||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
|
||||
echo "$tab" >> "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvcmsys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
none)
|
||||
exec "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown depmode $depmode" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
391
pcre2/doc/html/NON-AUTOTOOLS-BUILD.txt
Normal file
391
pcre2/doc/html/NON-AUTOTOOLS-BUILD.txt
Normal file
@ -0,0 +1,391 @@
|
||||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
This document has been converted from the PCRE1 document. I have removed a
|
||||
number of sections about building in various environments, as they applied only
|
||||
to PCRE1 and are probably out of date.
|
||||
|
||||
This document contains the following sections:
|
||||
|
||||
General
|
||||
Generic instructions for the PCRE2 C library
|
||||
Stack size in Windows environments
|
||||
Linking programs in Windows environments
|
||||
Calling conventions in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building PCRE2 on Windows with CMake
|
||||
Testing with RunTest.bat
|
||||
Building PCRE2 on native z/OS and z/VM
|
||||
|
||||
|
||||
GENERAL
|
||||
|
||||
The basic PCRE2 library consists entirely of code written in Standard C, and so
|
||||
should compile successfully on any system that has a Standard C compiler and
|
||||
library.
|
||||
|
||||
The PCRE2 distribution includes a "configure" file for use by the
|
||||
configure/make (autotools) build system, as found in many Unix-like
|
||||
environments. The README file contains information about the options for
|
||||
"configure".
|
||||
|
||||
There is also support for CMake, which some users prefer, especially in Windows
|
||||
environments, though it can also be run in Unix-like environments. See the
|
||||
section entitled "Building PCRE2 on Windows with CMake" below.
|
||||
|
||||
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
|
||||
under the names src/config.h.generic and src/pcre2.h.generic. These are
|
||||
provided for those who build PCRE2 without using "configure" or CMake. If you
|
||||
use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
|
||||
|
||||
The following are generic instructions for building the PCRE2 C library "by
|
||||
hand". If you are going to use CMake, this section does not apply to you; you
|
||||
can skip ahead to the CMake section.
|
||||
|
||||
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
|
||||
macro settings that it contains to whatever is appropriate for your
|
||||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators.
|
||||
|
||||
When you compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
||||
An alternative approach is not to edit src/config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need to the
|
||||
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||
|
||||
NOTE: There have been occasions when the way in which certain parameters
|
||||
in src/config.h are used has changed between releases. (In the
|
||||
configure/make world, this is handled automatically.) When upgrading to a
|
||||
new release, you are strongly advised to review src/config.h.generic
|
||||
before re-using what you had previously.
|
||||
|
||||
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
|
||||
|
||||
(3) EITHER:
|
||||
Copy or rename file src/pcre2_chartables.c.dist as
|
||||
src/pcre2_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
|
||||
if you have set up src/config.h), and then run it with the single
|
||||
argument "src/pcre2_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on a
|
||||
system that uses EBCDIC code.
|
||||
|
||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||
specify alternative tables at run time.
|
||||
|
||||
(4) For an 8-bit library, compile the following source files from the src
|
||||
directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
|
||||
set -DHAVE_CONFIG_H if you have set up src/config.h with your
|
||||
configuration, or else use other -D settings to change the configuration
|
||||
as required.
|
||||
|
||||
pcre2_auto_possess.c
|
||||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
pcre2_match.c
|
||||
pcre2_match_data.c
|
||||
pcre2_newline.c
|
||||
pcre2_ord2utf.c
|
||||
pcre2_pattern_info.c
|
||||
pcre2_serialize.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
pcre2_substitute.c
|
||||
pcre2_substring.c
|
||||
pcre2_tables.c
|
||||
pcre2_ucd.c
|
||||
pcre2_valid_utf.c
|
||||
pcre2_xclass.c
|
||||
|
||||
Make sure that you include -I. in the compiler command (or equivalent for
|
||||
an unusual compiler) so that all included PCRE2 header files are first
|
||||
sought in the src directory under the current directory. Otherwise you run
|
||||
the risk of picking up a previously-installed file from somewhere else.
|
||||
|
||||
Note that you must compile pcre2_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in src/config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre2_compile.c #includes other files from the sljit subdirectory, where
|
||||
there should be 16 files, all of whose names begin with "sljit". It also
|
||||
#includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should
|
||||
not compile these yourself.
|
||||
|
||||
(5) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
|
||||
If your system has static and shared libraries, you may have to do this
|
||||
once for each type.
|
||||
|
||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the src/pcre2posix.h file and then
|
||||
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||
library.
|
||||
|
||||
(8) The pcre2test program can be linked with any combination of the 8-bit,
|
||||
16-bit and 32-bit libraries (depending on what you selected in
|
||||
src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
|
||||
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
|
||||
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
|
||||
needs the pcre2posix wrapper library.
|
||||
|
||||
(9) Run pcre2test on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. There are
|
||||
comments about what each test does in the section entitled "Testing PCRE2"
|
||||
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||
32-bit libraries, you need to run pcre2test with the -16 option to do
|
||||
16-bit tests and with the -32 option to do 32-bit tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options are selected.
|
||||
For example, test 4 is for Unicode support, and will not run if you have
|
||||
built PCRE2 without it. See the comments at the start of each testinput
|
||||
file. If you have a suitable Unix-like shell, the RunTest script will run
|
||||
the appropriate tests for you. The command "RunTest list" will output a
|
||||
list of all the tests.
|
||||
|
||||
Note that the supplied files are in Unix format, with just LF characters
|
||||
as line terminators. You may need to edit them to change this if your
|
||||
system uses a different convention.
|
||||
|
||||
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
|
||||
by running pcre2test with the -jit option. This is done automatically by
|
||||
the RunTest script. You might also like to build and run the freestanding
|
||||
JIT test program, src/pcre2_jit_test.c.
|
||||
|
||||
(11) If you want to use the pcre2grep command, compile and link
|
||||
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
|
||||
need the pcre2posix library).
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
||||
The default processor stack size of 1Mb in some Windows environments is too
|
||||
small for matching patterns that need much recursion. In particular, test 2 may
|
||||
fail because of this. Normally, running out of stack causes a crash, but there
|
||||
have been cases where the test program has just died silently. See your linker
|
||||
documentation for how to increase stack size if you experience problems. If you
|
||||
are using CMake (see "BUILDING PCRE2 ON WINDOWS WITH CMAKE" below) and the gcc
|
||||
compiler, you can increase the stack size for pcre2test and pcre2grep by
|
||||
setting the CMAKE_EXE_LINKER_FLAGS variable to "-Wl,--stack,8388608" (for
|
||||
example). The Linux default of 8Mb is a reasonable choice for the stack, though
|
||||
even that can be too small for some pattern/subject combinations.
|
||||
|
||||
PCRE2 has a compile configuration option to disable the use of stack for
|
||||
recursion so that heap is used instead. However, pattern matching is
|
||||
significantly slower when this is done. There is more about stack usage in the
|
||||
"pcre2stack" documentation.
|
||||
|
||||
|
||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
If you want to statically link a program against a PCRE2 library in the form of
|
||||
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
|
||||
|
||||
|
||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
It is possible to compile programs to use different calling conventions using
|
||||
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||
easier to change the calling convention for the exported functions in the
|
||||
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
|
||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||
not set, it defaults to empty; the default calling convention is then used
|
||||
(which is what is wanted most of the time).
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
|
||||
|
||||
There are two ways of building PCRE2 using the "configure, make, make install"
|
||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||
the same thing; they are completely different from each other. There is also
|
||||
support for building using CMake, which some users find a more straightforward
|
||||
way of building PCRE2 under Windows.
|
||||
|
||||
The MinGW home page (http://www.mingw.org/) says this:
|
||||
|
||||
MinGW: A collection of freely available and freely distributable Windows
|
||||
specific header files and import libraries combined with GNU toolsets that
|
||||
allow one to produce native Windows programs that do not rely on any
|
||||
3rd-party C runtime DLLs.
|
||||
|
||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||
|
||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||
|
||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||
substantial Linux API functionality
|
||||
|
||||
. A collection of tools which provide Linux look and feel.
|
||||
|
||||
On both MinGW and Cygwin, PCRE2 should build correctly using:
|
||||
|
||||
./configure && make && make install
|
||||
|
||||
This should create two libraries called libpcre2-8 and libpcre2-posix. These
|
||||
are independent libraries: when you link with libpcre2-posix you must also link
|
||||
with libpcre2-8, which contains the basic functions.
|
||||
|
||||
Using Cygwin's compiler generates libraries and executables that depend on
|
||||
cygwin1.dll. If a library that is generated this way is distributed,
|
||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||
licence, this forces not only PCRE2 to be under the GPL, but also the entire
|
||||
application. A distributor who wants to keep their own code proprietary must
|
||||
purchase an appropriate Cygwin licence.
|
||||
|
||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||
executable that can run standalone on Windows without any third party dll or
|
||||
licensing issues.
|
||||
|
||||
But there is more complication:
|
||||
|
||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||
gcc and MinGW's gcc). So, a user can:
|
||||
|
||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||
-mno-cygwin.
|
||||
|
||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||
compiler flags.
|
||||
|
||||
The test files that are supplied with PCRE2 are in UNIX format, with LF
|
||||
characters as line terminators. Unless your PCRE2 library uses a default
|
||||
newline option that includes LF as a valid newline, it may be necessary to
|
||||
change the line terminators in the test files to get some of the tests to work.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH CMAKE
|
||||
|
||||
CMake is an alternative configuration facility that can be used instead of
|
||||
"configure". CMake creates project files (make files, solution files, etc.)
|
||||
tailored to numerous development environments, including Visual Studio,
|
||||
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
cache can be deleted by selecting "File > Delete Cache".
|
||||
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
|
||||
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
|
||||
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||
|
||||
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
8. The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
|
||||
10. Hit "Generate".
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
|
||||
12. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
|
||||
TESTING WITH RUNTEST.BAT
|
||||
|
||||
If configured with CMake, building the test project ("make test" or building
|
||||
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
|
||||
on your configuration options, possibly other test programs) in the build
|
||||
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
|
||||
exe paths.
|
||||
|
||||
For manual testing with RunTest.bat, provided the build dir is a subdirectory
|
||||
of the source directory: Open command shell window. Chdir to the location
|
||||
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
|
||||
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
|
||||
|
||||
To run only a particular test with RunTest.Bat provide a test number argument.
|
||||
|
||||
Otherwise:
|
||||
|
||||
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
|
||||
have been created.
|
||||
|
||||
2. Edit RunTest.bat to indentify the full or relative location of
|
||||
the pcre2 source (wherein which the testdata folder resides), e.g.:
|
||||
|
||||
set srcdir=C:\pcre2\pcre2-10.00
|
||||
|
||||
3. In a Windows command environment, chdir to the location of your bat and
|
||||
exe programs.
|
||||
|
||||
4. Run RunTest.bat. Test outputs will automatically be compared to expected
|
||||
results, and discrepancies will be identified in the console output.
|
||||
|
||||
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
|
||||
|
||||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||
applications can be supported through UNIX System Services, and in such an
|
||||
environment PCRE2 can be built in the same way as in other systems. However, in
|
||||
native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||
required. For details, please see this web site:
|
||||
|
||||
http://www.zaconsultants.net
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
|
||||
You may also download PCRE1 from WWW.CBTTAPE.ORG, file 882. Everything, source
|
||||
and executable, is in EBCDIC and native z/OS file formats and this is the
|
||||
recommended download site.
|
||||
|
||||
=============================
|
||||
Last Updated: 15 June 2015
|
835
pcre2/doc/html/README.txt
Normal file
835
pcre2/doc/html/README.txt
Normal file
@ -0,0 +1,835 @@
|
||||
README file for PCRE2 (Perl-compatible regular expression library)
|
||||
------------------------------------------------------------------
|
||||
|
||||
PCRE2 is a re-working of the original PCRE library to provide an entirely new
|
||||
API. The latest release of PCRE2 is always available in three alternative
|
||||
formats from:
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
subscribe or manage your subscription here:
|
||||
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
|
||||
The PCRE2 APIs
|
||||
Documentation for PCRE2
|
||||
Contributions by users of PCRE2
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
Building PCRE2 without using autotools
|
||||
Building PCRE2 using autotools
|
||||
Retrieving configuration information
|
||||
Shared libraries
|
||||
Cross-compiling using autotools
|
||||
Making new tarballs
|
||||
Testing PCRE2
|
||||
Character tables
|
||||
File manifest
|
||||
|
||||
|
||||
The PCRE2 APIs
|
||||
--------------
|
||||
|
||||
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. There are no C++
|
||||
wrappers.
|
||||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||
man page). These can be found in a library called libpcre2posix. Note that this
|
||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||
and does not give full access to all of PCRE2's facilities.
|
||||
|
||||
The header file for the POSIX-style functions is called pcre2posix.h. The
|
||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||
renamed or pointed at by a link.
|
||||
|
||||
If you are using the POSIX interface to PCRE2 and there is already a POSIX
|
||||
regex library installed on your system, as well as worrying about the regex.h
|
||||
header file (as mentioned above), you must also take care when linking programs
|
||||
to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
|
||||
pick up the POSIX functions of the same name from the other library.
|
||||
|
||||
One way of avoiding this confusion is to compile PCRE2 with the addition of
|
||||
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
|
||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
|
||||
effect of renaming the functions so that the names no longer clash. Of course,
|
||||
you have to do the same thing for your applications, or write them using the
|
||||
new names.
|
||||
|
||||
|
||||
Documentation for PCRE2
|
||||
-----------------------
|
||||
|
||||
If you install PCRE2 in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre2". The one that is
|
||||
just called "pcre2" lists all the others. In addition to these man pages, the
|
||||
PCRE2 documentation is supplied in two other forms:
|
||||
|
||||
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
|
||||
doc/pcre2test.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except the
|
||||
listing of pcre2demo.c and those that summarize individual functions. The
|
||||
other two are the text forms of the section 1 man pages for the pcre2grep
|
||||
and pcre2test commands. These text forms are provided for ease of scanning
|
||||
with text editors or similar tools. They are installed in
|
||||
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
|
||||
(defaulting to /usr/local).
|
||||
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
doc/html and installed in <prefix>/share/doc/pcre2/html.
|
||||
|
||||
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
---------------------------------------
|
||||
|
||||
For a non-Unix-like system, please read the comments in the file
|
||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||
"make" you may be able to build PCRE2 using autotools in the same way as for
|
||||
many Unix-like systems.
|
||||
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||
|
||||
PCRE2 has been compiled on many different operating systems. It should be
|
||||
straightforward to build PCRE2 on any system that has a Standard C compiler and
|
||||
library, because it uses only Standard C functions.
|
||||
|
||||
|
||||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
The use of autotools (in particular, libtool) is problematic in some
|
||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||
file for ways of building PCRE2 without using autotools.
|
||||
|
||||
|
||||
Building PCRE2 using autotools
|
||||
------------------------------
|
||||
|
||||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
||||
To build PCRE2 on system that supports autotools, first run the "configure"
|
||||
command from the PCRE2 distribution directory, with your current directory set
|
||||
to the directory where you want the files to be created. This command is a
|
||||
standard GNU "autoconf" configuration script, for which generic instructions
|
||||
are supplied in the file INSTALL.
|
||||
|
||||
Most commonly, people build PCRE2 within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
This command specifies that the C compiler should be run with the flags '-O2
|
||||
-Wall' instead of the default, and that "make install" should install PCRE2
|
||||
under /opt/local instead of the default /usr/local.
|
||||
|
||||
If you want to build in a different directory, just run "configure" with that
|
||||
directory as current. For example, suppose you have unpacked the PCRE2 source
|
||||
into /source/pcre2/pcre2-xxx, but you want to build it in
|
||||
/build/pcre2/pcre2-xxx:
|
||||
|
||||
cd /build/pcre2/pcre2-xxx
|
||||
/source/pcre2/pcre2-xxx/configure
|
||||
|
||||
PCRE2 is written in C and is normally compiled as a C library. However, it is
|
||||
possible to build it as a C++ library, though the provided building apparatus
|
||||
does not have any features to support this.
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE2
|
||||
library. They are also documented in the pcre2build man page.
|
||||
|
||||
. By default, both shared and static libraries are built. You can change this
|
||||
by adding one of these options to the "configure" command:
|
||||
|
||||
--disable-shared
|
||||
--disable-static
|
||||
|
||||
(See also "Shared libraries on Unix-like systems" below.)
|
||||
|
||||
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
|
||||
the "configure" command, the 16-bit library is also built. If you add
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
|
||||
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
|
||||
to disable building the 8-bit library.
|
||||
|
||||
. If you want to include support for just-in-time compiling, which can give
|
||||
large performance improvements on certain platforms, add --enable-jit to the
|
||||
"configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
||||
. If you do not want to make use of the support for UTF-8 Unicode character
|
||||
strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit
|
||||
library, or UTF-32 Unicode character strings in the 32-bit library, you can
|
||||
add --disable-unicode to the "configure" command. This reduces the size of
|
||||
the libraries. It is not possible to configure one library with Unicode
|
||||
support, and another without, in the same configuration.
|
||||
|
||||
When Unicode support is available, the use of a UTF encoding still has to be
|
||||
enabled by setting the PCRE2_UTF option at run time or starting a pattern
|
||||
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
|
||||
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. It is
|
||||
not possible to use both --enable-unicode and --enable-ebcdic at the same
|
||||
time.
|
||||
|
||||
As well as supporting UTF strings, Unicode support includes support for the
|
||||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
Escape sequences such as \d and \w in patterns do not by default make use of
|
||||
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
|
||||
or starting a pattern with (*UCP).
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE2 can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr, --enable-newline-is-lf,
|
||||
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
the standard tests will fail, because the lines in the test files end with
|
||||
LF. Even if the files are edited to change the line endings, there are likely
|
||||
to be some failures. With --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||
failures.
|
||||
|
||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||
sequence. This is independent of the option specifying what PCRE2 considers
|
||||
to be the end of a line (see above). However, the caller of PCRE2 can
|
||||
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
||||
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||
|
||||
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre2_match() can supply their own value. There is more discussion on the
|
||||
pcre2api man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
essentially "unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-recursion=500000
|
||||
|
||||
Recursive function calls use up the runtime stack; running out of stack can
|
||||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcre2stack man page.
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||
offsets. Increasing the internal link size reduces performance in the 8-bit
|
||||
and 16-bit libraries. In the 32-bit library, the link size setting is
|
||||
ignored, as 4-byte offsets are always used.
|
||||
|
||||
. You can build PCRE2 so that its internal match() function that is called from
|
||||
pcre2_match() does not call itself recursively. Instead, it uses memory
|
||||
blocks obtained from the heap to save data that would otherwise be saved on
|
||||
the stack. To build PCRE2 like this, use
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
on the "configure" command. PCRE2 runs more slowly in this mode, but it may
|
||||
be necessary in environments with limited stack sizes. This applies only to
|
||||
the normal execution of the pcre2_match() function; if JIT support is being
|
||||
successfully used, it is not relevant. Equally, it does not apply to
|
||||
pcre2_dfa_match(), which does not use deeply nested recursion. There is a
|
||||
discussion about stack sizes in the pcre2stack man page.
|
||||
|
||||
. For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, it uses a set of
|
||||
tables for ASCII encoding that is part of the distribution. If you specify
|
||||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre2_chartables.c. If you do
|
||||
not specify this option, pcre2_chartables.c is created as a copy of
|
||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||
information.
|
||||
|
||||
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
|
||||
character code (as opposed to ASCII/Unicode) by specifying
|
||||
|
||||
--enable-ebcdic --disable-unicode
|
||||
|
||||
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
|
||||
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
|
||||
which specifies that the code value for the EBCDIC NL character is 0x25
|
||||
instead of the default 0x15.
|
||||
|
||||
. If you specify --enable-debug, additional debugging code is included in the
|
||||
build. This option is intended for use by the PCRE2 maintainers.
|
||||
|
||||
. In environments where valgrind is installed, if you specify
|
||||
|
||||
--enable-valgrind
|
||||
|
||||
PCRE2 will use valgrind annotations to mark certain memory regions as
|
||||
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||
mostly useful for debugging PCRE2 itself.
|
||||
|
||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||
is installed, if you specify
|
||||
|
||||
--enable-coverage
|
||||
|
||||
the build process implements a code coverage report for the test suite. The
|
||||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE2 for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE2. There is more information about coverage
|
||||
reporting in the "pcre2build" documentation.
|
||||
|
||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
||||
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||
specifying one or both of
|
||||
|
||||
--enable-pcre2grep-libz
|
||||
--enable-pcre2grep-libbz2
|
||||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. The default size (in bytes) of the internal buffer used by pcre2grep can be
|
||||
set by, for example:
|
||||
|
||||
--with-pcre2grep-bufsize=51200
|
||||
|
||||
The value must be a plain integer. The default is 20480.
|
||||
|
||||
. It is possible to compile pcre2test so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
||||
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
|
||||
|
||||
If this is done, when pcre2test's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
pcre2test linked in this way, there may be licensing issues. These can be
|
||||
avoided by linking with libedit (which has a BSD licence) instead.
|
||||
|
||||
Enabling libreadline causes the -lreadline option to be added to the
|
||||
pcre2test build. In many operating environments with a sytem-installed
|
||||
readline library this is sufficient. However, in some environments (e.g. if
|
||||
an unmodified distribution version of readline is in use), it may be
|
||||
necessary to specify something like LIBS="-lncurses" as well. This is
|
||||
because, to quote the readline INSTALL, "Readline uses the termcap functions,
|
||||
but does not link with the termcap or curses library itself, allowing
|
||||
applications which link with readline the to choose an appropriate library."
|
||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||
should fix it.
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile the makefile that builds the library
|
||||
. src/config.h build-time configuration options for the library
|
||||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
. libpcre2-32.pc )
|
||||
. libpcre2-posix.pc )
|
||||
. libtool script that builds shared and/or static libraries
|
||||
|
||||
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
|
||||
tarballs under the names config.h.generic and pcre2.h.generic. These are
|
||||
provided for those who have to build PCRE2 without using "configure" or CMake.
|
||||
If you use "configure" or CMake, the .generic versions are not used.
|
||||
|
||||
The "configure" script also creates config.status, which is an executable
|
||||
script that can be run to recreate the configuration, and config.log, which
|
||||
contains compiler output from tests that "configure" runs.
|
||||
|
||||
Once "configure" has run, you can run "make". This builds whichever of the
|
||||
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
|
||||
program called pcre2test. If you enabled JIT support with --enable-jit, another
|
||||
test program called pcre2_jit_test is built as well. If the 8-bit library is
|
||||
built, libpcre2-posix and the pcre2grep command are also built. Running
|
||||
"make" with the -j option may speed up compilation on multiprocessor systems.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE2
|
||||
tests are given below in a separate section of this document. The -j option of
|
||||
"make" can also be used when running the tests.
|
||||
|
||||
You can use "make install" to install PCRE2 into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
<prefix> that is set when "configure" is run):
|
||||
|
||||
Commands (bin):
|
||||
pcre2test
|
||||
pcre2grep (if 8-bit support is enabled)
|
||||
pcre2-config
|
||||
|
||||
Libraries (lib):
|
||||
libpcre2-8 (if 8-bit support is enabled)
|
||||
libpcre2-16 (if 16-bit support is enabled)
|
||||
libpcre2-32 (if 32-bit support is enabled)
|
||||
libpcre2-posix (if 8-bit support is enabled)
|
||||
|
||||
Configuration information (lib/pkgconfig):
|
||||
libpcre2-8.pc
|
||||
libpcre2-16.pc
|
||||
libpcre2-32.pc
|
||||
libpcre2-posix.pc
|
||||
|
||||
Header files (include):
|
||||
pcre2.h
|
||||
pcre2posix.h
|
||||
|
||||
Man pages (share/man/man{1,3}):
|
||||
pcre2grep.1
|
||||
pcre2test.1
|
||||
pcre2-config.1
|
||||
pcre2.3
|
||||
pcre2*.3 (lots more pages, all starting "pcre2")
|
||||
|
||||
HTML documentation (share/doc/pcre2/html):
|
||||
index.html
|
||||
*.html (lots more pages, hyperlinked from index.html)
|
||||
|
||||
Text file documentation (share/doc/pcre2):
|
||||
AUTHORS
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
pcre2.txt (a concatenation of the man(3) pages)
|
||||
pcre2test.txt the pcre2test man page
|
||||
pcre2grep.txt the pcre2grep man page
|
||||
pcre2-config.txt the pcre2-config man page
|
||||
|
||||
If you want to remove PCRE2 from your system, you can run "make uninstall".
|
||||
This removes all the files that "make install" installed. However, it does not
|
||||
remove any directories, because these are often shared with other programs.
|
||||
|
||||
|
||||
Retrieving configuration information
|
||||
------------------------------------
|
||||
|
||||
Running "make install" installs the command pcre2-config, which can be used to
|
||||
recall information about the PCRE2 configuration and installation. For example:
|
||||
|
||||
pcre2-config --version
|
||||
|
||||
prints the version number, and
|
||||
|
||||
pcre2-config --libs8
|
||||
|
||||
outputs information about where the 8-bit library is installed. This command
|
||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
obtain a list of possible arguments.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
about installed libraries. Instead of separate commands for each library, a
|
||||
single command is used. For example:
|
||||
|
||||
pkg-config --libs libpcre2-16
|
||||
|
||||
The data is held in *.pc files that are installed in a directory called
|
||||
<prefix>/lib/pkgconfig.
|
||||
|
||||
|
||||
Shared libraries
|
||||
----------------
|
||||
|
||||
The default distribution builds PCRE2 as shared libraries and static libraries,
|
||||
as long as the operating system supports shared libraries. Shared library
|
||||
support relies on the "libtool" script which is built as part of the
|
||||
"configure" process.
|
||||
|
||||
The libtool script is used to compile and link both shared and static
|
||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||
built. The programs pcre2test and pcre2grep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcre2grep and pcre2test are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the build directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE2 using static libraries only you must use --disable-shared when
|
||||
configuring it. For example:
|
||||
|
||||
./configure --prefix=/usr/gnu --disable-shared
|
||||
|
||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling using autotools
|
||||
-------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre2_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||
tables that assumes ASCII code. Cross-compiling with the default tables should
|
||||
not be a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
|
||||
and run it on the local host to make a new version of pcre2_chartables.c.dist.
|
||||
Then when you cross-compile PCRE2 this new version of the tables will be used.
|
||||
|
||||
|
||||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
|
||||
|
||||
Testing PCRE2
|
||||
------------
|
||||
|
||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the pcre2grep command.
|
||||
When JIT support is enabled, a third test program called pcre2_jit_test is
|
||||
built. Both the scripts and all the program tests are run if you obey "make
|
||||
check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
|
||||
|
||||
The RunTest script runs the pcre2test test program (which is documented in its
|
||||
own man page) on each of the relevant testinput files in the testdata
|
||||
directory, and compares the output with the contents of the corresponding
|
||||
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||
from pcre2test. Other files whose names begin with "test" are used as working
|
||||
files in some tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options were selected. For
|
||||
example, the tests for UTF-8/16/32 features are run only when Unicode support
|
||||
is available. RunTest outputs a comment when it skips a test.
|
||||
|
||||
Many (but not all) of the tests that are not skipped are run twice if JIT
|
||||
support is available. On the second run, JIT compilation is forced. This
|
||||
testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
on the RunTest command line. To run pcre2test on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
RunTest 2 7 11
|
||||
|
||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
end), or a number preceded by ~ to exclude a test. For example:
|
||||
|
||||
Runtest 3-15 ~10
|
||||
|
||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||
except test 13. Whatever order the arguments are in, the tests are always run
|
||||
in numerical order.
|
||||
|
||||
You can also call RunTest with the single argument "list" to cause it to output
|
||||
a list of tests.
|
||||
|
||||
The test sequence starts with "test 0", which is a special test that has no
|
||||
input file, and whose output is not checked. This is because it will be
|
||||
different on different hardware and with different configurations. The test
|
||||
exists in order to exercise some of pcre2test's code that would not otherwise
|
||||
be run.
|
||||
|
||||
Tests 1 and 2 can always be run, as they expect only plain text strings (not
|
||||
UTF) and make no use of Unicode properties. The first test file can be fed
|
||||
directly into the perltest.sh script to check that Perl gives the same results.
|
||||
The only difference you should see is in the first few lines, where the Perl
|
||||
version is given instead of the PCRE2 version. The second set of tests check
|
||||
auxiliary functions, error detection, and run-time flags that are specific to
|
||||
PCRE2. It also uses the debugging flags to check some of the internals of
|
||||
pcre2_compile().
|
||||
|
||||
If you build PCRE2 with a locale setting that is not the standard C locale, the
|
||||
character tables may be different (see next paragraph). In some cases, this may
|
||||
cause failures in the second set of tests. For example, in a locale where the
|
||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||
[:isascii:] inside a character class defines a different set of characters, and
|
||||
this shows up in this test as a difference in the compiled code, which is being
|
||||
listed for checking. For example, where the comparison test output contains
|
||||
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
|
||||
cases. This is not a bug in PCRE2.
|
||||
|
||||
Test 3 checks pcre2_maketables(), the facility for building a set of character
|
||||
tables for a specific locale and using them instead of the default tables. The
|
||||
script uses the "locale" command to check for the availability of the "fr_FR",
|
||||
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
|
||||
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
|
||||
the list of available locales, the third test cannot be run, and a comment is
|
||||
output to say why. If running this test produces an error like this:
|
||||
|
||||
** Failed to set locale "fr_FR"
|
||||
|
||||
it means that the given locale is not available on your system, despite being
|
||||
listed by "locale". This does not mean that PCRE2 is broken. There are three
|
||||
alternative output files for the third test, because three different versions
|
||||
of the French locale have been encountered. The test passes if its output
|
||||
matches any one of them.
|
||||
|
||||
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
|
||||
with the perltest.sh script, and test 5 checking PCRE2-specific things.
|
||||
|
||||
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
|
||||
non-UTF mode and UTF-mode with Unicode property support, respectively.
|
||||
|
||||
Test 8 checks some internal offsets and code size features; it is run only when
|
||||
the default "link size" of 2 is set (in other cases the sizes change) and when
|
||||
Unicode support is enabled.
|
||||
|
||||
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
|
||||
16-bit and 32-bit modes. These are tests that generate different output in
|
||||
8-bit mode. Each pair are for general cases and Unicode support, respectively.
|
||||
Test 13 checks the handling of non-UTF characters greater than 255 by
|
||||
pcre2_dfa_match() in 16-bit and 32-bit modes.
|
||||
|
||||
Test 14 contains a number of tests that must not be run with JIT. They check,
|
||||
among other non-JIT things, the match-limiting features of the intepretive
|
||||
matcher.
|
||||
|
||||
Test 15 is run only when JIT support is not available. It checks that an
|
||||
attempt to use JIT has the expected behaviour.
|
||||
|
||||
Test 16 is run only when JIT support is available. It checks JIT complete and
|
||||
partial modes, match-limiting under JIT, and other JIT-specific features.
|
||||
|
||||
Tests 17 and 18 are run only in 8-bit mode. They check the POSIX interface to
|
||||
the 8-bit library, without and with Unicode support, respectively.
|
||||
|
||||
Test 19 checks the serialization functions by writing a set of compiled
|
||||
patterns to a file, and then reloading and checking them.
|
||||
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
For speed, PCRE2 uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, a set of tables that is
|
||||
built into the library is used. The pcre2_maketables() function can be called
|
||||
by an application to create a new set of tables in the current locale. This are
|
||||
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
|
||||
compile context.
|
||||
|
||||
The source file called pcre2_chartables.c contains the default set of tables.
|
||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||
specified for ./configure, a different version of pcre2_chartables.c is built
|
||||
by the program dftables (compiled from dftables.c), which uses the ANSI C
|
||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||
islower(), etc. to build the table sources. This means that the default C
|
||||
locale which is set for your system will control the contents of these default
|
||||
tables. You can change the default tables by editing pcre2_chartables.c and
|
||||
then re-building PCRE2. If you do this, you should take care to ensure that the
|
||||
file does not get automatically re-generated. The best way to do this is to
|
||||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre2_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256. The final 256-byte table has bits indicating various character types,
|
||||
as follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE2 to malfunction.
|
||||
|
||||
|
||||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the files listed below.
|
||||
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||
ASCII coding; unless --enable-rebuild-chartables is
|
||||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
|
||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||
|
||||
src/config.h.in template for config.h, when built by "configure"
|
||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||
src/pcre2posix.h header for the external POSIX wrapper API
|
||||
src/pcre2_internal.h header for internal use
|
||||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
|
||||
sljit/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
src/pcre2demo.c simple demonstration of coding calls to PCRE2
|
||||
src/pcre2grep.c source of a grep utility that uses PCRE2
|
||||
src/pcre2test.c comprehensive test program
|
||||
src/pcre2_printint.c part of pcre2test
|
||||
src/pcre2_jit_test.c JIT test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE2
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE2
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE2
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcre2grep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for PCRE2
|
||||
doc/*.1 man page sources for pcre2grep and pcre2test
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre2.txt plain text version of the man pages
|
||||
doc/pcre2test.txt plain text documentation of test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||
libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
testdata/* other supporting test files
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindEditline.cmake
|
||||
cmake/FindReadline.cmake
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for building PCRE2 "by hand"
|
||||
|
||||
pcre2.h.generic ) a version of the public PCRE2 header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 24 April 2015
|
264
pcre2/doc/html/index.html
Normal file
264
pcre2/doc/html/index.html
Normal file
@ -0,0 +1,264 @@
|
||||
<html>
|
||||
<!-- This is a manually maintained file that is the root of the HTML version of
|
||||
the PCRE2 documentation. When the HTML documents are built from the man
|
||||
page versions, the entire doc/html directory is emptied, this file is then
|
||||
copied into doc/html/index.html, and the remaining files therein are
|
||||
created by the 132html script.
|
||||
-->
|
||||
<head>
|
||||
<title>PCRE2 specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
|
||||
<p>
|
||||
The HTML documentation for PCRE2 consists of a number of pages that are listed
|
||||
below in alphabetical order. If you are new to PCRE2, please read the first one
|
||||
first.
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tr><td><a href="pcre2.html">pcre2</a></td>
|
||||
<td> Introductory page</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2-config.html">pcre2-config</a></td>
|
||||
<td> Information about the installation configuration</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2api.html">pcre2api</a></td>
|
||||
<td> PCRE2's native API</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2build.html">pcre2build</a></td>
|
||||
<td> Building PCRE2</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2callout.html">pcre2callout</a></td>
|
||||
<td> The <i>callout</i> facility</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
|
||||
<td> Compability with Perl</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
|
||||
<td> A demonstration C program that uses the PCRE2 library</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2grep.html">pcre2grep</a></td>
|
||||
<td> The <b>pcre2grep</b> command</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2jit.html">pcre2jit</a></td>
|
||||
<td> Discussion of the just-in-time optimization support</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2limits.html">pcre2limits</a></td>
|
||||
<td> Details of size and other limits</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2matching.html">pcre2matching</a></td>
|
||||
<td> Discussion of the two matching algorithms</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2partial.html">pcre2partial</a></td>
|
||||
<td> Using PCRE2 for partial matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
|
||||
<td> Specification of the regular expressions supported by PCRE2</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2perform.html">pcre2perform</a></td>
|
||||
<td> Some comments on performance</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2posix.html">pcre2posix</a></td>
|
||||
<td> The POSIX API to the PCRE2 8-bit library</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2sample.html">pcre2sample</a></td>
|
||||
<td> Discussion of the pcre2demo program</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2serialize.html">pcre2serialize</a></td>
|
||||
<td> Serializing functions for saving precompiled patterns</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2stack.html">pcre2stack</a></td>
|
||||
<td> Discussion of PCRE2's stack usage</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
|
||||
<td> Syntax quick-reference summary</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2test.html">pcre2test</a></td>
|
||||
<td> The <b>pcre2test</b> command for testing PCRE2</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
|
||||
<td> Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
There are also individual pages that summarize the interface for each function
|
||||
in the library.
|
||||
</p>
|
||||
|
||||
<table>
|
||||
|
||||
<tr><td><a href="pcre2_callout_enumerate.html">pcre2_callout_enumerate</a></td>
|
||||
<td> Enumerate callouts in a compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
||||
<td> Free a compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
|
||||
<td> Compile a regular expression pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_compile_context_copy.html">pcre2_compile_context_copy</a></td>
|
||||
<td> Copy a compile context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_compile_context_create.html">pcre2_compile_context_create</a></td>
|
||||
<td> Create a compile context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_compile_context_free.html">pcre2_compile_context_free</a></td>
|
||||
<td> Free a compile context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
|
||||
<td> Show build-time configuration options</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_general_context_copy.html">pcre2_general_context_copy</a></td>
|
||||
<td> Copy a general context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_general_context_create.html">pcre2_general_context_create</a></td>
|
||||
<td> Create a general context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_general_context_free.html">pcre2_general_context_free</a></td>
|
||||
<td> Free a general context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_get_error_message.html">pcre2_get_error_message</a></td>
|
||||
<td> Free study data</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_get_mark.html">pcre2_get_mark</a></td>
|
||||
<td> Get a (*MARK) name</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_get_ovector_count.html">pcre2_get_ovector_count</a></td>
|
||||
<td> Get the ovector count</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_get_ovector_pointer.html">pcre2_get_ovector_pointer</a></td>
|
||||
<td> Get a pointer to the ovector</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_get_startchar.html">pcre2_get_startchar</a></td>
|
||||
<td> Get the starting character offset</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_compile.html">pcre2_jit_compile</a></td>
|
||||
<td> Process a compiled pattern with the JIT compiler</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_free_unused_memory.html">pcre2_jit_free_unused_memory</a></td>
|
||||
<td> Free unused JIT memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
|
||||
<td> Fast path interface to JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
|
||||
<td> Assign stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_create.html">pcre2_jit_stack_create</a></td>
|
||||
<td> Create a stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
|
||||
<td> Free a JIT matching stack</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
|
||||
<td> Build character tables in current locale</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match.html">pcre2_match</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_context_copy.html">pcre2_match_context_copy</a></td>
|
||||
<td> Copy a match context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_context_create.html">pcre2_match_context_create</a></td>
|
||||
<td> Create a match context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_context_free.html">pcre2_match_context_free</a></td>
|
||||
<td> Free a match context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_data_create.html">pcre2_match_data_create</a></td>
|
||||
<td> Create a match data block</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_data_create_from_pattern.html">pcre2_match_data_create_from_pattern</a></td>
|
||||
<td> Create a match data block getting size from pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
|
||||
<td> Free a match data block</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
|
||||
<td> Extract information about a pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_serialize_decode.html">pcre2_serialize_decode</a></td>
|
||||
<td> Decode serialized compiled patterns</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_serialize_encode.html">pcre2_serialize_encode</a></td>
|
||||
<td> Serialize compiled patterns for save/restore</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_serialize_free.html">pcre2_serialize_free</a></td>
|
||||
<td> Free serialized compiled patterns</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_serialize_get_number_of_codes.html">pcre2_serialize_get_number_of_codes</a></td>
|
||||
<td> Get number of serialized compiled patterns</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_bsr.html">pcre2_set_bsr</a></td>
|
||||
<td> Set \R convention</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_callout.html">pcre2_set_callout</a></td>
|
||||
<td> Set up a callout function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
|
||||
<td> Set character tables</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
|
||||
<td> Set up a compile recursion guard function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
|
||||
<td> Set the newline convention</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
|
||||
<td> Set the parentheses nesting limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_recursion_limit.html">pcre2_set_recursion_limit</a></td>
|
||||
<td> Set the match recursion limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
|
||||
<td> Set match recursion memory management</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
|
||||
<td> Match a compiled pattern to a subject string and do
|
||||
substitutions</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_copy_byname.html">pcre2_substring_copy_byname</a></td>
|
||||
<td> Extract named substring into given buffer</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_copy_bynumber.html">pcre2_substring_copy_bynumber</a></td>
|
||||
<td> Extract numbered substring into given buffer</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_free.html">pcre2_substring_free</a></td>
|
||||
<td> Free extracted substring</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_get_byname.html">pcre2_substring_get_byname</a></td>
|
||||
<td> Extract named substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_get_bynumber.html">pcre2_substring_get_bynumber</a></td>
|
||||
<td> Extract numbered substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_length_byname.html">pcre2_substring_length_byname</a></td>
|
||||
<td> Find length of named substring</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_length_bynumber.html">pcre2_substring_length_bynumber</a></td>
|
||||
<td> Find length of numbered substring</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_list_free.html">pcre2_substring_list_free</a></td>
|
||||
<td> Free list of extracted substrings</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_list_get.html">pcre2_substring_list_get</a></td>
|
||||
<td> Extract all substrings into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_nametable_scan.html">pcre2_substring_nametable_scan</a></td>
|
||||
<td> Find table entries for given string name</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substring_number_from_name.html">pcre2_substring_number_from_name</a></td>
|
||||
<td> Convert captured string name to number</td></tr>
|
||||
</table>
|
||||
|
||||
</html>
|
||||
|
102
pcre2/doc/html/pcre2-config.html
Normal file
102
pcre2/doc/html/pcre2-config.html
Normal file
@ -0,0 +1,102 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2-config specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2-config man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">OPTIONS</a>
|
||||
<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
|
||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
|
||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>pcre2-config [--prefix] [--exec-prefix] [--version]</b>
|
||||
<b> [--libs8] [--libs16] [--libs32] [--libs-posix]</b>
|
||||
<b> [--cflags] [--cflags-posix]</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
<b>pcre2-config</b> returns the configuration of the installed PCRE2 libraries
|
||||
and the options required to compile a program to use them. Some of the options
|
||||
apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are
|
||||
not available for libraries that have not been built. If an unavailable option
|
||||
is encountered, the "usage" information is output.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
|
||||
<P>
|
||||
<b>--prefix</b>
|
||||
Writes the directory prefix used in the PCRE2 installation for architecture
|
||||
independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
|
||||
systems) to the standard output.
|
||||
</P>
|
||||
<P>
|
||||
<b>--exec-prefix</b>
|
||||
Writes the directory prefix used in the PCRE2 installation for architecture
|
||||
dependent files (normally the same as <b>--prefix</b>) to the standard output.
|
||||
</P>
|
||||
<P>
|
||||
<b>--version</b>
|
||||
Writes the version number of the installed PCRE2 libraries to the standard
|
||||
output.
|
||||
</P>
|
||||
<P>
|
||||
<b>--libs8</b>
|
||||
Writes to the standard output the command line options required to link
|
||||
with the 8-bit PCRE2 library (<b>-lpcre2-8</b> on many systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--libs16</b>
|
||||
Writes to the standard output the command line options required to link
|
||||
with the 16-bit PCRE2 library (<b>-lpcre2-16</b> on many systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--libs32</b>
|
||||
Writes to the standard output the command line options required to link
|
||||
with the 32-bit PCRE2 library (<b>-lpcre2-32</b> on many systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--libs-posix</b>
|
||||
Writes to the standard output the command line options required to link with
|
||||
PCRE2's POSIX API wrapper library (<b>-lpcre2-posix</b> <b>-lpcre2-8</b> on many
|
||||
systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--cflags</b>
|
||||
Writes to the standard output the command line options required to compile
|
||||
files that use PCRE2 (this may include some <b>-I</b> options, but is blank on
|
||||
many systems).
|
||||
</P>
|
||||
<P>
|
||||
<b>--cflags-posix</b>
|
||||
Writes to the standard output the command line options required to compile
|
||||
files that use PCRE2's POSIX API wrapper library (this may include some
|
||||
<b>-I</b> options, but is blank on many systems).
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2(3)</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
This manual page was originally written by Mark Baker for the Debian GNU/Linux
|
||||
system. It has been subsequently revised as a generic PCRE2 man page.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 28 September 2014
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
196
pcre2/doc/html/pcre2.html
Normal file
196
pcre2/doc/html/pcre2.html
Normal file
@ -0,0 +1,196 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2 specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2 man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
|
||||
<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
|
||||
<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
|
||||
<li><a name="TOC4" href="#SEC4">AUTHOR</a>
|
||||
<li><a name="TOC5" href="#SEC5">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
|
||||
<P>
|
||||
PCRE2 is the name used for a revised API for the PCRE library, which is a set
|
||||
of functions, written in C, that implement regular expression pattern matching
|
||||
using the same syntax and semantics as Perl, with just a few differences. Some
|
||||
features that appeared in Python and the original PCRE before they appeared in
|
||||
Perl are also available using the Python syntax. There is also some support for
|
||||
one or two .NET and Oniguruma syntax items, and there are options for
|
||||
requesting some minor changes that give better ECMAScript (aka JavaScript)
|
||||
compatibility.
|
||||
</P>
|
||||
<P>
|
||||
The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
|
||||
code units, which means that up to three separate libraries may be installed.
|
||||
The original work to extend PCRE to 16-bit and 32-bit code units was done by
|
||||
Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
|
||||
can be interpreted either as one character per code unit, or as UTF-encoded
|
||||
Unicode, with support for Unicode general category properties. Unicode support
|
||||
is optional at build time (but is the default). However, processing strings as
|
||||
UTF code units must be enabled explicitly at run time. The version of Unicode
|
||||
in use can be discovered by running
|
||||
<pre>
|
||||
pcre2test -C
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
The three libraries contain identical sets of functions, with names ending in
|
||||
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
|
||||
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
||||
one code unit width can be written using generic names such as
|
||||
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
|
||||
the case.
|
||||
</P>
|
||||
<P>
|
||||
In addition to the Perl-compatible matching function, PCRE2 contains an
|
||||
alternative function that matches the same compiled patterns in a different
|
||||
way. In certain circumstances, the alternative function has some advantages.
|
||||
For a discussion of the two matching algorithms, see the
|
||||
<a href="pcre2matching.html"><b>pcre2matching</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
Details of exactly which Perl regular expression features are and are not
|
||||
supported by PCRE2 are given in separate documents. See the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
and
|
||||
<a href="pcre2compat.html"><b>pcre2compat</b></a>
|
||||
pages. There is a syntax summary in the
|
||||
<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
Some features of PCRE2 can be included, excluded, or changed when the library
|
||||
is built. The
|
||||
<a href="pcre2_config.html"><b>pcre2_config()</b></a>
|
||||
function makes it possible for a client to discover which features are
|
||||
available. The features themselves are described in the
|
||||
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||
page. Documentation about building PCRE2 for various operating systems can be
|
||||
found in the
|
||||
<a href="README.txt"><b>README</b></a>
|
||||
and
|
||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
|
||||
files in the source distribution.
|
||||
</P>
|
||||
<P>
|
||||
The libraries contains a number of undocumented internal functions and data
|
||||
tables that are used by more than one of the exported external functions, but
|
||||
which are not intended for use by external callers. Their names all begin with
|
||||
"_pcre2", which hopefully will not provoke any name clashes. In some
|
||||
environments, it is possible to control which external symbols are exported
|
||||
when a shared library is built, and in these cases the undocumented symbols are
|
||||
not exported.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
|
||||
<P>
|
||||
If you are using PCRE2 in a non-UTF application that permits users to supply
|
||||
arbitrary patterns for compilation, you should be aware of a feature that
|
||||
allows users to turn on UTF support from within a pattern. For example, an
|
||||
8-bit pattern that begins with "(*UTF)" turns on UTF-8 mode, which interprets
|
||||
patterns and subjects as strings of UTF-8 code units instead of individual
|
||||
8-bit characters. This causes both the pattern and any data against which it is
|
||||
matched to be checked for UTF-8 validity. If the data string is very long, such
|
||||
a check might use sufficiently many resources as to cause your application to
|
||||
lose performance.
|
||||
</P>
|
||||
<P>
|
||||
One way of guarding against this possibility is to use the
|
||||
<b>pcre2_pattern_info()</b> function to check the compiled pattern's options for
|
||||
PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when calling
|
||||
<b>pcre2_compile()</b>. This causes an compile time error if a pattern contains
|
||||
a UTF-setting sequence.
|
||||
</P>
|
||||
<P>
|
||||
The use of Unicode properties for character types such as \d can also be
|
||||
enabled from within the pattern, by specifying "(*UCP)". This feature can be
|
||||
disallowed by setting the PCRE2_NEVER_UCP option.
|
||||
</P>
|
||||
<P>
|
||||
If your application is one that supports UTF, be aware that validity checking
|
||||
can take time. If the same data string is to be matched many times, you can use
|
||||
the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid
|
||||
running redundant checks.
|
||||
</P>
|
||||
<P>
|
||||
The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to
|
||||
problems, because it may leave the current matching point in the middle of a
|
||||
multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to
|
||||
lock out the use of \C, causing a compile-time error if it is encountered.
|
||||
</P>
|
||||
<P>
|
||||
Another way that performance can be hit is by running a pattern that has a very
|
||||
large search tree against a string that will never match. Nested unlimited
|
||||
repeats in a pattern are a common example. PCRE2 provides some protection
|
||||
against this: see the <b>pcre2_set_match_limit()</b> function in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
|
||||
<P>
|
||||
The user documentation for PCRE2 comprises a number of different sections. In
|
||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||
each is a separate page, linked from the index page. In the plain text format,
|
||||
the descriptions of the <b>pcre2grep</b> and <b>pcre2test</b> programs are in
|
||||
files called <b>pcre2grep.txt</b> and <b>pcre2test.txt</b>, respectively. The
|
||||
remaining sections, except for the <b>pcre2demo</b> section (which is a program
|
||||
listing), and the short pages for individual functions, are concatenated in
|
||||
<b>pcre2.txt</b>, for ease of searching. The sections are as follows:
|
||||
<pre>
|
||||
pcre2 this document
|
||||
pcre2-config show PCRE2 installation configuration information
|
||||
pcre2api details of PCRE2's native C API
|
||||
pcre2build building PCRE2
|
||||
pcre2callout details of the callout feature
|
||||
pcre2compat discussion of Perl compatibility
|
||||
pcre2demo a demonstration C program that uses PCRE2
|
||||
pcre2grep description of the <b>pcre2grep</b> command (8-bit only)
|
||||
pcre2jit discussion of just-in-time optimization support
|
||||
pcre2limits details of size and other limits
|
||||
pcre2matching discussion of the two matching algorithms
|
||||
pcre2partial details of the partial matching facility
|
||||
pcre2pattern syntax and semantics of supported regular expression patterns
|
||||
pcre2perform discussion of performance issues
|
||||
pcre2posix the POSIX-compatible C API for the 8-bit library
|
||||
pcre2sample discussion of the pcre2demo program
|
||||
pcre2stack discussion of stack usage
|
||||
pcre2syntax quick syntax reference
|
||||
pcre2test description of the <b>pcre2test</b> command
|
||||
pcre2unicode discussion of Unicode and UTF support
|
||||
</pre>
|
||||
In the "man" and HTML formats, there is also a short page for each C library
|
||||
function, listing its arguments and results.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<P>
|
||||
Putting an actual email address here is a spam magnet. If you want to email me,
|
||||
use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 13 April 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
62
pcre2/doc/html/pcre2_callout_enumerate.html
Normal file
62
pcre2/doc/html/pcre2_callout_enumerate.html
Normal file
@ -0,0 +1,62 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_callout_enumerate specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_callout_enumerate man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function scans a compiled regular expression and calls the <i>callback()</i>
|
||||
function for each callout within the pattern. The yield of the function is zero
|
||||
for success and non-zero otherwise. The arguments are:
|
||||
<pre>
|
||||
<i>code</i> Points to the compiled pattern
|
||||
<i>callback</i> The callback function
|
||||
<i>callout_data</i> User data that is passed to the callback
|
||||
</pre>
|
||||
The <i>callback()</i> function is passed a pointer to a data block containing
|
||||
the following fields:
|
||||
<pre>
|
||||
<i>version</i> Block version number
|
||||
<i>pattern_position</i> Offset to next item in pattern
|
||||
<i>next_item_length</i> Length of next item in pattern
|
||||
<i>callout_number</i> Number for numbered callouts
|
||||
<i>callout_string_offset</i> Offset to string within pattern
|
||||
<i>callout_string_length</i> Length of callout string
|
||||
<i>callout_string</i> Points to callout string or is NULL
|
||||
</pre>
|
||||
The second argument is the callout data that was passed to
|
||||
<b>pcre2_callout_enumerate()</b>. The <b>callback()</b> function must return zero
|
||||
for success. Any other value causes the pattern scan to stop, with the value
|
||||
being passed back as the result of <b>pcre2_callout_enumerate()</b>.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
39
pcre2/doc/html/pcre2_code_free.html
Normal file
39
pcre2/doc/html/pcre2_code_free.html
Normal file
@ -0,0 +1,39 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_code_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_code_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function frees the memory used for a compiled pattern, including any
|
||||
memory used by the JIT compiler.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
91
pcre2/doc/html/pcre2_compile.html
Normal file
91
pcre2/doc/html/pcre2_compile.html
Normal file
@ -0,0 +1,91 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_compile specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_compile man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
||||
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
|
||||
<b> pcre2_compile_context *<i>ccontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function compiles a regular expression pattern into an internal form. Its
|
||||
arguments are:
|
||||
<pre>
|
||||
<i>pattern</i> A string containing expression to be compiled
|
||||
<i>length</i> The length of the string or PCRE2_ZERO_TERMINATED
|
||||
<i>options</i> Option bits
|
||||
<i>errorcode</i> Where to put an error code
|
||||
<i>erroffset</i> Where to put an error offset
|
||||
<i>ccontext</i> Pointer to a compile context or NULL
|
||||
</pre>
|
||||
The length of the string and any error offset that is returned are in code
|
||||
units, not characters. A compile context is needed only if you want to change
|
||||
<pre>
|
||||
What \R matches (Unicode newlines or CR, LF, CRLF only)
|
||||
PCRE2's character tables
|
||||
The newline character sequence
|
||||
The compile time nested parentheses limit
|
||||
</pre>
|
||||
or provide an external function for stack size checking. The option bits are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Force pattern anchoring
|
||||
PCRE2_ALT_BSUX Alternative handling of \u, \U, and \x
|
||||
PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode
|
||||
PCRE2_AUTO_CALLOUT Compile automatic callouts
|
||||
PCRE2_CASELESS Do caseless matching
|
||||
PCRE2_DOLLAR_ENDONLY $ not to match newline at end
|
||||
PCRE2_DOTALL . matches anything including NL
|
||||
PCRE2_DUPNAMES Allow duplicate names for subpatterns
|
||||
PCRE2_EXTENDED Ignore white space and # comments
|
||||
PCRE2_FIRSTLINE Force matching to be before newline
|
||||
PCRE2_MATCH_UNSET_BACKREF Match unset back references
|
||||
PCRE2_MULTILINE ^ and $ match newlines within data
|
||||
PCRE2_NEVER_BACKSLASH_C Lock out the use of \C in patterns
|
||||
PCRE2_NEVER_UCP Lock out PCRE2_UCP, e.g. via (*UCP)
|
||||
PCRE2_NEVER_UTF Lock out PCRE2_UTF, e.g. via (*UTF)
|
||||
PCRE2_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE2_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE2_NO_DOTSTAR_ANCHOR Disable automatic anchoring for .*
|
||||
PCRE2_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE2_NO_UTF_CHECK Do not check the pattern for UTF validity
|
||||
(only relevant if PCRE2_UTF is set)
|
||||
PCRE2_UCP Use Unicode properties for \d, \w, etc.
|
||||
PCRE2_UNGREEDY Invert greediness of quantifiers
|
||||
PCRE2_UTF Treat pattern and subjects as UTF strings
|
||||
</pre>
|
||||
PCRE2 must be built with Unicode support in order to use PCRE2_UTF, PCRE2_UCP
|
||||
and related options.
|
||||
</P>
|
||||
<P>
|
||||
The yield of the function is a pointer to a private data structure that
|
||||
contains the compiled pattern, or NULL if an error was detected.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
41
pcre2/doc/html/pcre2_compile_context_copy.html
Normal file
41
pcre2/doc/html/pcre2_compile_context_copy.html
Normal file
@ -0,0 +1,41 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_compile_context_copy specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_compile_context_copy man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_compile_context *pcre2_compile_context_copy(</b>
|
||||
<b> pcre2_compile_context *<i>ccontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function makes a new copy of a compile context, using the memory
|
||||
allocation function that was used for the original context. The result is NULL
|
||||
if the memory cannot be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
42
pcre2/doc/html/pcre2_compile_context_create.html
Normal file
42
pcre2/doc/html/pcre2_compile_context_create.html
Normal file
@ -0,0 +1,42 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_compile_context_create specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_compile_context_create man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_compile_context *pcre2_compile_context_create(</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function creates and initializes a new compile context. If its argument is
|
||||
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
|
||||
allocation function within the general context is used. The result is NULL if
|
||||
the memory could not be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_compile_context_free.html
Normal file
40
pcre2/doc/html/pcre2_compile_context_free.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_compile_context_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_compile_context_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function frees the memory occupied by a compile context, using the memory
|
||||
freeing function from the general context with which it was created, or
|
||||
<b>free()</b> if that was not set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
83
pcre2/doc/html/pcre2_config.html
Normal file
83
pcre2/doc/html/pcre2_config.html
Normal file
@ -0,0 +1,83 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_config specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_config man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function makes it possible for a client program to find out which optional
|
||||
features are available in the version of the PCRE2 library it is using. The
|
||||
arguments are as follows:
|
||||
<pre>
|
||||
<i>what</i> A code specifying what information is required
|
||||
<i>where</i> Points to where to put the information
|
||||
</pre>
|
||||
If <i>where</i> is NULL, the function returns the amount of memory needed for
|
||||
the requested information. When the information is a string, the value is in
|
||||
code units; for other types of data it is in bytes.
|
||||
</P>
|
||||
<P>
|
||||
If <b>where</b> is not NULL, for PCRE2_CONFIG_JITTARGET,
|
||||
PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a
|
||||
buffer that is large enough to hold the string. For all other codes it must
|
||||
point to a uint32_t integer variable. The available codes are:
|
||||
<pre>
|
||||
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
||||
PCRE2_BSR_UNICODE
|
||||
PCRE2_BSR_ANYCRLF
|
||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler
|
||||
support (1=yes 0=no)
|
||||
PCRE2_CONFIG_JITTARGET Information about the target archi-
|
||||
tecture for the JIT compiler
|
||||
PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4)
|
||||
PCRE2_CONFIG_MATCHLIMIT Default internal resource limit
|
||||
PCRE2_CONFIG_NEWLINE Code for the default newline sequence:
|
||||
PCRE2_NEWLINE_CR
|
||||
PCRE2_NEWLINE_LF
|
||||
PCRE2_NEWLINE_CRLF
|
||||
PCRE2_NEWLINE_ANY
|
||||
PCRE2_NEWLINE_ANYCRLF
|
||||
PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit
|
||||
PCRE2_CONFIG_RECURSIONLIMIT Internal recursion depth limit
|
||||
PCRE2_CONFIG_STACKRECURSE Recursion implementation (1=stack
|
||||
0=heap)
|
||||
PCRE2_CONFIG_UNICODE Availability of Unicode support (1=yes
|
||||
0=no)
|
||||
PCRE2_CONFIG_UNICODE_VERSION The Unicode version (a string)
|
||||
PCRE2_CONFIG_VERSION The PCRE2 version (a string)
|
||||
</pre>
|
||||
The function yields a non-negative value on success or the negative value
|
||||
PCRE2_ERROR_BADOPTION otherwise. This is also the result for the
|
||||
PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is
|
||||
requested, the function returns the number of code units used, including the
|
||||
terminating zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
79
pcre2/doc/html/pcre2_dfa_match.html
Normal file
79
pcre2/doc/html/pcre2_dfa_match.html
Normal file
@ -0,0 +1,79 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_dfa_match specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_dfa_match man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> int *<i>workspace</i>, PCRE2_SIZE <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function matches a compiled regular expression against a given subject
|
||||
string, using an alternative matching algorithm that scans the subject string
|
||||
just once (<i>not</i> Perl-compatible). (The Perl-compatible matching function
|
||||
is <b>pcre2_match()</b>.) The arguments for this function are:
|
||||
<pre>
|
||||
<i>code</i> Points to the compiled pattern
|
||||
<i>subject</i> Points to the subject string
|
||||
<i>length</i> Length of the subject string
|
||||
<i>startoffset</i> Offset in the subject at which to start matching
|
||||
<i>options</i> Option bits
|
||||
<i>match_data</i> Points to a match data block, for results
|
||||
<i>mcontext</i> Points to a match context, or is NULL
|
||||
<i>workspace</i> Points to a vector of ints used as working space
|
||||
<i>wscount</i> Number of elements in the vector
|
||||
</pre>
|
||||
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
|
||||
up a callout function. The <i>length</i> and <i>startoffset</i> values are code
|
||||
units, not characters. The options are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject is not the end of a line
|
||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||
is not a valid match
|
||||
PCRE2_NO_UTF_CHECK Do not check the subject for UTF
|
||||
validity (only relevant if PCRE2_UTF
|
||||
was set at compile time)
|
||||
PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial
|
||||
match if no full matches are found
|
||||
PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial match
|
||||
even if there is a full match as well
|
||||
PCRE2_DFA_RESTART Restart after a partial match
|
||||
PCRE2_DFA_SHORTEST Return only the shortest match
|
||||
</pre>
|
||||
There are restrictions on what may appear in a pattern when using this matching
|
||||
function. Details are given in the
|
||||
<a href="pcre2matching.html"><b>pcre2matching</b></a>
|
||||
documentation. For details of partial matching, see the
|
||||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||
page. There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
42
pcre2/doc/html/pcre2_general_context_copy.html
Normal file
42
pcre2/doc/html/pcre2_general_context_copy.html
Normal file
@ -0,0 +1,42 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_general_context_copy specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_general_context_copy man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_general_context *pcre2_general_context_copy(</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function makes a new copy of a general context, using the memory
|
||||
allocation functions in the context, if set, to get the necessary memory.
|
||||
Otherwise <b>malloc()</b> is used. The result is NULL if the memory cannot be
|
||||
obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
44
pcre2/doc/html/pcre2_general_context_create.html
Normal file
44
pcre2/doc/html/pcre2_general_context_create.html
Normal file
@ -0,0 +1,44 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_general_context_create specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_general_context_create man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_general_context *pcre2_general_context_create(</b>
|
||||
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
||||
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function creates and initializes a general context. The arguments define
|
||||
custom memory management functions and a data value that is passed to them when
|
||||
they are called. The <b>private_malloc()</b> function is used to get memory for
|
||||
the context. If either of the first two arguments is NULL, the system memory
|
||||
management function is used. The result is NULL if no memory could be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
39
pcre2/doc/html/pcre2_general_context_free.html
Normal file
39
pcre2/doc/html/pcre2_general_context_free.html
Normal file
@ -0,0 +1,39 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_general_context_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_general_context_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function frees the memory occupied by a general context, using the memory
|
||||
freeing function within the context, if set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
48
pcre2/doc/html/pcre2_get_error_message.html
Normal file
48
pcre2/doc/html/pcre2_get_error_message.html
Normal file
@ -0,0 +1,48 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_get_error_message specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_get_error_message man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function provides a textual error message for each PCRE2 error code.
|
||||
Compilation errors are positive numbers; UTF formatting errors and matching
|
||||
errors are negative numbers. The arguments are:
|
||||
<pre>
|
||||
<i>errorcode</i> an error code (positive or negative)
|
||||
<i>buffer</i> where to put the message
|
||||
<i>bufflen</i> the length of the buffer (code units)
|
||||
</pre>
|
||||
The function returns the length of the message, excluding the trailing zero, or
|
||||
a negative error code if the buffer is too small.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
43
pcre2/doc/html/pcre2_get_mark.html
Normal file
43
pcre2/doc/html/pcre2_get_mark.html
Normal file
@ -0,0 +1,43 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_get_mark specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_get_mark man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
After a call of <b>pcre2_match()</b> that was passed the match block that is
|
||||
this function's argument, this function returns a pointer to the last (*MARK)
|
||||
name that was encountered. The name is zero-terminated, and is within the
|
||||
compiled pattern. If no (*MARK) name is available, NULL is returned. A (*MARK)
|
||||
name may be available after a failed match or a partial match, as well as after
|
||||
a successful one.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
39
pcre2/doc/html/pcre2_get_ovector_count.html
Normal file
39
pcre2/doc/html/pcre2_get_ovector_count.html
Normal file
@ -0,0 +1,39 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_get_ovector_count specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_get_ovector_count man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns the number of pairs of offsets in the ovector that forms
|
||||
part of the given match data block.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_get_ovector_pointer.html
Normal file
40
pcre2/doc/html/pcre2_get_ovector_pointer.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_get_ovector_pointer specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_get_ovector_pointer man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns a pointer to the vector of offsets that forms part of the
|
||||
given match data block. The number of pairs can be found by calling
|
||||
<b>pcre2_get_ovector_count()</b>.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
44
pcre2/doc/html/pcre2_get_startchar.html
Normal file
44
pcre2/doc/html/pcre2_get_startchar.html
Normal file
@ -0,0 +1,44 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_get_startchar specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_get_startchar man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
After a successful call of <b>pcre2_match()</b> that was passed the match block
|
||||
that is this function's argument, this function returns the code unit offset of
|
||||
the character at which the successful match started. For a non-partial match,
|
||||
this can be different to the value of <i>ovector[0]</i> if the pattern contains
|
||||
the \K escape sequence. After a partial match, however, this value is always
|
||||
the same as <i>ovector[0]</i> because \K does not affect the result of a
|
||||
partial match.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
56
pcre2/doc/html/pcre2_jit_compile.html
Normal file
56
pcre2/doc/html/pcre2_jit_compile.html
Normal file
@ -0,0 +1,56 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_jit_compile specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_jit_compile man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function requests JIT compilation, which, if the just-in-time compiler is
|
||||
available, further processes a compiled pattern into machine code that executes
|
||||
much faster than the <b>pcre2_match()</b> interpretive matching function. Full
|
||||
details are given in the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
The first argument is a pointer that was returned by a successful call to
|
||||
<b>pcre2_compile()</b>, and the second must contain one or more of the following
|
||||
bits:
|
||||
<pre>
|
||||
PCRE2_JIT_COMPLETE compile code for full matching
|
||||
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
|
||||
PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching
|
||||
</pre>
|
||||
The yield of the function is 0 for success, or a negative error code otherwise.
|
||||
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
|
||||
if an unknown bit is set in <i>options</i>.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
43
pcre2/doc/html/pcre2_jit_free_unused_memory.html
Normal file
43
pcre2/doc/html/pcre2_jit_free_unused_memory.html
Normal file
@ -0,0 +1,43 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_jit_free_unused_memory specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_jit_free_unused_memory man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function frees unused JIT executable memory. The argument is a general
|
||||
context, for custom memory management, or NULL for standard memory management.
|
||||
JIT memory allocation retains some memory in order to improve future JIT
|
||||
compilation speed. In low memory conditions,
|
||||
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
|
||||
freed.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
58
pcre2/doc/html/pcre2_jit_match.html
Normal file
58
pcre2/doc/html/pcre2_jit_match.html
Normal file
@ -0,0 +1,58 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_jit_match specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_jit_match man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function matches a compiled regular expression that has been successfully
|
||||
processed by the JIT compiler against a given subject string, using a matching
|
||||
algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
|
||||
it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
|
||||
Its arguments are exactly the same as for
|
||||
<a href="pcre2_match.html"><b>pcre2_match()</b>.</a>
|
||||
</P>
|
||||
<P>
|
||||
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||
PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported
|
||||
options are ignored. The subject string is not checked for UTF validity.
|
||||
</P>
|
||||
<P>
|
||||
The return values are the same as for <b>pcre2_match()</b> plus
|
||||
PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
|
||||
that was not compiled. For details of partial matching, see the
|
||||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the JIT API in the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
70
pcre2/doc/html/pcre2_jit_stack_assign.html
Normal file
70
pcre2/doc/html/pcre2_jit_stack_assign.html
Normal file
@ -0,0 +1,70 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_jit_stack_assign specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_jit_stack_assign man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function provides control over the memory used by JIT as a run-time stack
|
||||
when <b>pcre2_match()</b> or <b>pcre2_jit_match()</b> is called with a pattern
|
||||
that has been successfully processed by the JIT compiler. The information that
|
||||
determines which stack is used is put into a match context that is subsequently
|
||||
passed to a matching function. The arguments of this function are:
|
||||
<pre>
|
||||
mcontext a pointer to a match context
|
||||
callback a callback function
|
||||
callback_data a JIT stack or a value to be passed to the callback
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32K
|
||||
block on the machine stack is used.
|
||||
</P>
|
||||
<P>
|
||||
If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
|
||||
<i>callback_data</i> must be a valid JIT stack, the result of calling
|
||||
<b>pcre2_jit_stack_create()</b>.
|
||||
</P>
|
||||
<P>
|
||||
If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
|
||||
argument at the start of matching, in order to set up a JIT stack. If the
|
||||
result is NULL, the internal 32K stack is used; otherwise the return value must
|
||||
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
||||
</P>
|
||||
<P>
|
||||
You may safely use the same JIT stack for multiple patterns, as long as they
|
||||
are all matched in the same thread. In a multithread application, each thread
|
||||
must use its own JIT stack. For more details, see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
50
pcre2/doc/html/pcre2_jit_stack_create.html
Normal file
50
pcre2/doc/html/pcre2_jit_stack_create.html
Normal file
@ -0,0 +1,50 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_jit_stack_create specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_jit_stack_create man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE <i>startsize</i>,</b>
|
||||
<b> PCRE2_SIZE <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is used to create a stack for use by the code compiled by the JIT
|
||||
compiler. The first two arguments are a starting size for the stack, and a
|
||||
maximum size to which it is allowed to grow. The final argument is a general
|
||||
context, for memory allocation functions, or NULL for standard memory
|
||||
allocation. The result can be passed to the JIT run-time code by calling
|
||||
<b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
|
||||
which can then be processed by <b>pcre2_match()</b>. If the "fast path" JIT
|
||||
matcher, <b>pcre2_jit_match()</b> is used, the stack can be passed directly as
|
||||
an argument. A maximum stack size of 512K to 1M should be more than enough for
|
||||
any pattern. For more details, see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
42
pcre2/doc/html/pcre2_jit_stack_free.html
Normal file
42
pcre2/doc/html/pcre2_jit_stack_free.html
Normal file
@ -0,0 +1,42 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_jit_stack_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_jit_stack_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_jit_stack_free(pcre2_jit_stack *<i>jit_stack</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is used to free a JIT stack that was created by
|
||||
<b>pcre2_jit_stack_create()</b> when it is no longer needed. For more details,
|
||||
see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
48
pcre2/doc/html/pcre2_maketables.html
Normal file
48
pcre2/doc/html/pcre2_maketables.html
Normal file
@ -0,0 +1,48 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_maketables specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_maketables man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>const unsigned char *pcre2_maketables(pcre22_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function builds a set of character tables for character values less than
|
||||
256. These can be passed to <b>pcre2_compile()</b> in a compile context in order
|
||||
to override the internal, built-in tables (which were either defaulted or made
|
||||
by <b>pcre2_maketables()</b> when PCRE2 was compiled). See the
|
||||
<a href="pcre2_set_character_tables.html"><b>pcre2_set_character_tables()</b></a>
|
||||
page. You might want to do this if you are using a non-standard locale.
|
||||
</P>
|
||||
<P>
|
||||
If the argument is NULL, <b>malloc()</b> is used to get memory for the tables.
|
||||
Otherwise it must point to a general context, which can supply pointers to a
|
||||
custom memory manager. The function yields a pointer to the tables.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
76
pcre2/doc/html/pcre2_match.html
Normal file
76
pcre2/doc/html/pcre2_match.html
Normal file
@ -0,0 +1,76 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_match specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_match man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function matches a compiled regular expression against a given subject
|
||||
string, using a matching algorithm that is similar to Perl's. It returns
|
||||
offsets to captured substrings. Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> Points to the compiled pattern
|
||||
<i>subject</i> Points to the subject string
|
||||
<i>length</i> Length of the subject string
|
||||
<i>startoffset</i> Offset in the subject at which to start matching
|
||||
<i>options</i> Option bits
|
||||
<i>match_data</i> Points to a match data block, for results
|
||||
<i>mcontext</i> Points to a match context, or is NULL
|
||||
</pre>
|
||||
A match context is needed only if you want to:
|
||||
<pre>
|
||||
Set up a callout function
|
||||
Change the limit for calling the internal function <i>match()</i>
|
||||
Change the limit for calling <i>match()</i> recursively
|
||||
Set custom memory management when the heap is used for recursion
|
||||
</pre>
|
||||
The <i>length</i> and <i>startoffset</i> values are code
|
||||
units, not characters. The options are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject string is not the end of a line
|
||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||
is not a valid match
|
||||
PCRE2_NO_UTF_CHECK Do not check the subject for UTF
|
||||
validity (only relevant if PCRE2_UTF
|
||||
was set at compile time)
|
||||
PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial
|
||||
match if no full matches are found
|
||||
PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial match
|
||||
if that is found before a full match
|
||||
</pre>
|
||||
For details of partial matching, see the
|
||||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||
page. There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
41
pcre2/doc/html/pcre2_match_context_copy.html
Normal file
41
pcre2/doc/html/pcre2_match_context_copy.html
Normal file
@ -0,0 +1,41 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_match_context_copy specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_match_context_copy man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_match_context *pcre2_match_context_copy(</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function makes a new copy of a match context, using the memory
|
||||
allocation function that was used for the original context. The result is NULL
|
||||
if the memory cannot be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
42
pcre2/doc/html/pcre2_match_context_create.html
Normal file
42
pcre2/doc/html/pcre2_match_context_create.html
Normal file
@ -0,0 +1,42 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_match_context_create specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_match_context_create man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_match_context *pcre2_match_context_create(</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function creates and initializes a new match context. If its argument is
|
||||
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
|
||||
allocation function within the general context is used. The result is NULL if
|
||||
the memory could not be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_match_context_free.html
Normal file
40
pcre2/doc/html/pcre2_match_context_free.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_match_context_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_match_context_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function frees the memory occupied by a match context, using the memory
|
||||
freeing function from the general context with which it was created, or
|
||||
<b>free()</b> if that was not set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
49
pcre2/doc/html/pcre2_match_data_create.html
Normal file
49
pcre2/doc/html/pcre2_match_data_create.html
Normal file
@ -0,0 +1,49 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_match_data_create specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_match_data_create man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function creates a new match data block, which is used for holding the
|
||||
result of a match. The first argument specifies the number of pairs of offsets
|
||||
that are required. These form the "output vector" (ovector) within the match
|
||||
data block, and are used to identify the matched string and any captured
|
||||
substrings. There is always one pair of offsets; if <b>ovecsize</b> is zero, it
|
||||
is treated as one.
|
||||
</P>
|
||||
<P>
|
||||
The second argument points to a general context, for custom memory management,
|
||||
or is NULL for system memory management. The result of the function is NULL if
|
||||
the memory for the block could not be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
50
pcre2/doc/html/pcre2_match_data_create_from_pattern.html
Normal file
50
pcre2/doc/html/pcre2_match_data_create_from_pattern.html
Normal file
@ -0,0 +1,50 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_match_data_create_from_pattern specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_match_data_create_from_pattern man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function creates a new match data block, which is used for holding the
|
||||
result of a match. The first argument points to a compiled pattern. The number
|
||||
of capturing parentheses within the pattern is used to compute the number of
|
||||
pairs of offsets that are required in the match data block. These form the
|
||||
"output vector" (ovector) within the match data block, and are used to identify
|
||||
the matched string and any captured substrings.
|
||||
</P>
|
||||
<P>
|
||||
The second argument points to a general context, for custom memory management,
|
||||
or is NULL to use the same memory allocator as was used for the compiled
|
||||
pattern. The result of the function is NULL if the memory for the block could
|
||||
not be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_match_data_free.html
Normal file
40
pcre2/doc/html/pcre2_match_data_free.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_match_data_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_match_data_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function frees the memory occupied by a match data block, using the memory
|
||||
freeing function from the general context with which it was created, or
|
||||
<b>free()</b> if that was not set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
106
pcre2/doc/html/pcre2_pattern_info.html
Normal file
106
pcre2/doc/html/pcre2_pattern_info.html
Normal file
@ -0,0 +1,106 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_pattern_info specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_pattern_info man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns information about a compiled pattern. Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> Pointer to a compiled regular expression
|
||||
<i>what</i> What information is required
|
||||
<i>where</i> Where to put the information
|
||||
</pre>
|
||||
The recognized values for the <i>what</i> argument, and the information they
|
||||
request are as follows:
|
||||
<pre>
|
||||
PCRE2_INFO_ALLOPTIONS Final options after compiling
|
||||
PCRE2_INFO_ARGOPTIONS Options passed to <b>pcre2_compile()</b>
|
||||
PCRE2_INFO_BACKREFMAX Number of highest back reference
|
||||
PCRE2_INFO_BSR What \R matches:
|
||||
PCRE2_BSR_UNICODE: Unicode line endings
|
||||
PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
|
||||
PCRE2_INFO_CAPTURECOUNT Number of capturing subpatterns
|
||||
PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL
|
||||
PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1
|
||||
PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information
|
||||
0 nothing set
|
||||
1 first code unit is set
|
||||
2 start of string or after newline
|
||||
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
|
||||
exist in the pattern
|
||||
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
||||
PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1
|
||||
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
||||
0 nothing set
|
||||
1 code unit is set
|
||||
PCRE2_INFO_MATCHEMPTY 1 if the pattern can match an
|
||||
empty string, 0 otherwise
|
||||
PCRE2_INFO_MATCHLIMIT Match limit if set,
|
||||
otherwise PCRE2_ERROR_UNSET
|
||||
PCRE2_INFO_MAXLOOKBEHIND Length (in characters) of the longest
|
||||
lookbehind assertion
|
||||
PCRE2_INFO_MINLENGTH Lower bound length of matching strings
|
||||
PCRE2_INFO_NAMEENTRYSIZE Size of name table entries
|
||||
PCRE2_INFO_NAMECOUNT Number of named subpatterns
|
||||
PCRE2_INFO_NAMETABLE Pointer to name table
|
||||
PCRE2_CONFIG_NEWLINE Code for the newline sequence:
|
||||
PCRE2_NEWLINE_CR
|
||||
PCRE2_NEWLINE_LF
|
||||
PCRE2_NEWLINE_CRLF
|
||||
PCRE2_NEWLINE_ANY
|
||||
PCRE2_NEWLINE_ANYCRLF
|
||||
PCRE2_INFO_RECURSIONLIMIT Recursion limit if set,
|
||||
otherwise PCRE2_ERROR_UNSET
|
||||
PCRE2_INFO_SIZE Size of compiled pattern
|
||||
</pre>
|
||||
If <i>where</i> is NULL, the function returns the amount of memory needed for
|
||||
the requested information, in bytes. Otherwise, the <i>where</i> argument must
|
||||
point to an unsigned 32-bit integer (uint32_t variable), except for the
|
||||
following <i>what</i> values, when it must point to a variable of the type
|
||||
shown:
|
||||
<pre>
|
||||
PCRE2_INFO_FIRSTBITMAP const uint8_t *
|
||||
PCRE2_INFO_JITSIZE size_t
|
||||
PCRE2_INFO_NAMETABLE PCRE2_SPTR
|
||||
PCRE2_INFO_SIZE size_t
|
||||
</pre>
|
||||
The yield of the function is zero on success or:
|
||||
<pre>
|
||||
PCRE2_ERROR_NULL the argument <i>code</i> is NULL
|
||||
PCRE2_ERROR_BADMAGIC the "magic number" was not found
|
||||
PCRE2_ERROR_BADOPTION the value of <i>what</i> is invalid
|
||||
PCRE2_ERROR_BADMODE the pattern was compiled in the wrong mode
|
||||
PCRE2_ERROR_UNSET the requested information is not set
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
62
pcre2/doc/html/pcre2_serialize_decode.html
Normal file
62
pcre2/doc/html/pcre2_serialize_decode.html
Normal file
@ -0,0 +1,62 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_serialize_decode specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_serialize_decode man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b>
|
||||
<b> int32_t <i>number_of_codes</i>, const uint32_t *<i>bytes</i>,</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function decodes a serialized set of compiled patterns back into a list of
|
||||
individual patterns. Its arguments are:
|
||||
<pre>
|
||||
<i>codes</i> pointer to a vector in which to build the list
|
||||
<i>number_of_codes</i> number of slots in the vector
|
||||
<i>bytes</i> the serialized byte stream
|
||||
<i>gcontext</i> pointer to a general context or NULL
|
||||
</pre>
|
||||
The <i>bytes</i> argument must point to a block of data that was originally
|
||||
created by <b>pcre2_serialize_encode()</b>, though it may have been saved on
|
||||
disc or elsewhere in the meantime. If there are more codes in the serialized
|
||||
data than slots in the list, only those compiled patterns that will fit are
|
||||
decoded. The yield of the function is the number of decoded patterns, or one of
|
||||
the following negative error codes:
|
||||
<pre>
|
||||
PCRE2_ERROR_BADDATA <i>number_of_codes</i> is zero or less
|
||||
PCRE2_ERROR_BADMAGIC mismatch of id bytes in <i>bytes</i>
|
||||
PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version
|
||||
PCRE2_ERROR_MEMORY memory allocation failed
|
||||
PCRE2_ERROR_NULL <i>codes</i> or <i>bytes</i> is NULL
|
||||
</pre>
|
||||
PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
|
||||
on a system with different endianness.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
61
pcre2/doc/html/pcre2_serialize_encode.html
Normal file
61
pcre2/doc/html/pcre2_serialize_encode.html
Normal file
@ -0,0 +1,61 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_serialize_encode specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_serialize_encode man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int32_t pcre2_serialize_encode(pcre2_code **<i>codes</i>,</b>
|
||||
<b> int32_t <i>number_of_codes</i>, uint32_t **<i>serialized_bytes</i>,</b>
|
||||
<b> PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function encodes a list of compiled patterns into a byte stream that can
|
||||
be saved on disc or elsewhere. Its arguments are:
|
||||
<pre>
|
||||
<i>codes</i> pointer to a vector containing the list
|
||||
<i>number_of_codes</i> number of slots in the vector
|
||||
<i>serialized_bytes</i> set to point to the serialized byte stream
|
||||
<i>serialized_size</i> set to the number of bytes in the byte stream
|
||||
<i>gcontext</i> pointer to a general context or NULL
|
||||
</pre>
|
||||
The context argument is used to obtain memory for the byte stream. When the
|
||||
serialized data is no longer needed, it must be freed by calling
|
||||
<b>pcre2_serialize_free()</b>. The yield of the function is the number of
|
||||
serialized patterns, or one of the following negative error codes:
|
||||
<pre>
|
||||
PCRE2_ERROR_BADDATA <i>number_of_codes</i> is zero or less
|
||||
PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns
|
||||
PCRE2_ERROR_MEMORY memory allocation failed
|
||||
PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables
|
||||
PCRE2_ERROR_NULL an argument other than <i>gcontext</i> is NULL
|
||||
</pre>
|
||||
PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or
|
||||
that a slot in the vector does not point to a compiled pattern.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_serialize_free.html
Normal file
40
pcre2/doc/html/pcre2_serialize_free.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_serialize_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_serialize_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_serialize_free(uint8_t *<i>bytes</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function frees the memory that was obtained by
|
||||
<b>pcre2_serialize_encode()</b> to hold a serialized byte stream. The argument
|
||||
must point to such a byte stream.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
49
pcre2/doc/html/pcre2_serialize_get_number_of_codes.html
Normal file
49
pcre2/doc/html/pcre2_serialize_get_number_of_codes.html
Normal file
@ -0,0 +1,49 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_serialize_get_number_of_codes specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_serialize_get_number_of_codes man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int32_t pcre2_serialize_get_number_of_codes(const uint8_t *<i>bytes</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
The <i>bytes</i> argument must point to a serialized byte stream that was
|
||||
originally created by <b>pcre2_serialize_encode()</b> (though it may have been
|
||||
saved on disc or elsewhere in the meantime). The function returns the number of
|
||||
serialized patterns in the byte stream, or one of the following negative error
|
||||
codes:
|
||||
<pre>
|
||||
PCRE2_ERROR_BADMAGIC mismatch of id bytes in <i>bytes</i>
|
||||
PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version
|
||||
PCRE2_ERROR_NULL the argument is NULL
|
||||
</pre>
|
||||
PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
|
||||
on a system with different endianness.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
42
pcre2/doc/html/pcre2_set_bsr.html
Normal file
42
pcre2/doc/html/pcre2_set_bsr.html
Normal file
@ -0,0 +1,42 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_bsr specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_bsr man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the convention for processing \R within a compile context.
|
||||
The second argument must be one of PCRE2_BSR_ANYCRLF or PCRE2_BSR_UNICODE. The
|
||||
result is zero for success or PCRE2_ERROR_BADDATA if the second argument is
|
||||
invalid.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
43
pcre2/doc/html/pcre2_set_callout.html
Normal file
43
pcre2/doc/html/pcre2_set_callout.html
Normal file
@ -0,0 +1,43 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_callout specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_callout man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the callout fields in a match context (the first argument).
|
||||
The second argument specifies a callout function, and the third argument is an
|
||||
opaque data time that is passed to it. The result of this function is always
|
||||
zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
42
pcre2/doc/html/pcre2_set_character_tables.html
Normal file
42
pcre2/doc/html/pcre2_set_character_tables.html
Normal file
@ -0,0 +1,42 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_character_tables specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_character_tables man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets a pointer to custom character tables within a compile
|
||||
context. The second argument must be the result of a call to
|
||||
<b>pcre2_maketables()</b> or NULL to request the default tables. The result is
|
||||
always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
46
pcre2/doc/html/pcre2_set_compile_recursion_guard.html
Normal file
46
pcre2/doc/html/pcre2_set_compile_recursion_guard.html
Normal file
@ -0,0 +1,46 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_compile_recursion_guard specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_compile_recursion_guard man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function defines, within a compile context, a function that is called
|
||||
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
||||
pattern. The first argument to the function gives the current depth of
|
||||
parenthesis nesting, and the second is user data that is supplied when the
|
||||
function is set up. The callout function should return zero if all is well, or
|
||||
non-zero to force an error. This feature is provided so that applications can
|
||||
check the available system stack space, in order to avoid running out. The
|
||||
result of <b>pcre2_set_compile_recursion_guard()</b> is always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_set_match_limit.html
Normal file
40
pcre2/doc/html/pcre2_set_match_limit.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_match_limit specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_match_limit man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the match limit field in a match context. The result is
|
||||
always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
50
pcre2/doc/html/pcre2_set_newline.html
Normal file
50
pcre2/doc/html/pcre2_set_newline.html
Normal file
@ -0,0 +1,50 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_newline specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_newline man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the newline convention within a compile context. This
|
||||
specifies which character(s) are recognized as newlines when compiling and
|
||||
matching patterns. The second argument must be one of:
|
||||
<pre>
|
||||
PCRE2_NEWLINE_CR Carriage return only
|
||||
PCRE2_NEWLINE_LF Linefeed only
|
||||
PCRE2_NEWLINE_CRLF CR followed by LF only
|
||||
PCRE2_NEWLINE_ANYCRLF Any of the above
|
||||
PCRE2_NEWLINE_ANY Any Unicode newline sequence
|
||||
</pre>
|
||||
The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is
|
||||
invalid.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_set_parens_nest_limit.html
Normal file
40
pcre2/doc/html/pcre2_set_parens_nest_limit.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_parens_nest_limit specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_parens_nest_limit man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets, in a compile context, the maximum depth of nested
|
||||
parentheses in a pattern. The result is always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_set_recursion_limit.html
Normal file
40
pcre2/doc/html/pcre2_set_recursion_limit.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_recursion_limit specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_recursion_limit man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_recursion_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the recursion limit field in a match context. The result is
|
||||
always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
47
pcre2/doc/html/pcre2_set_recursion_memory_management.html
Normal file
47
pcre2/doc/html/pcre2_set_recursion_memory_management.html
Normal file
@ -0,0 +1,47 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_recursion_memory_management specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_recursion_memory_management man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_recursion_memory_management(</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
||||
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the match context fields for custom memory management when
|
||||
PCRE2 is compiled to use the heap instead of the system stack for recursive
|
||||
function calls while matching. When PCRE2 is compiled to use the stack (the
|
||||
default) this function does nothing. The first argument is a match context, the
|
||||
second and third specify the memory allocation and freeing functions, and the
|
||||
final argument is an opaque value that is passed to them whenever they are
|
||||
called. The result of this function is always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
85
pcre2/doc/html/pcre2_substitute.html
Normal file
85
pcre2/doc/html/pcre2_substitute.html
Normal file
@ -0,0 +1,85 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substitute specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substitute man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
|
||||
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
|
||||
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function matches a compiled regular expression against a given subject
|
||||
string, using a matching algorithm that is similar to Perl's. It then makes a
|
||||
copy of the subject, substituting a replacement string for what was matched.
|
||||
Its arguments are:
|
||||
<pre>
|
||||
<i>code</i> Points to the compiled pattern
|
||||
<i>subject</i> Points to the subject string
|
||||
<i>length</i> Length of the subject string
|
||||
<i>startoffset</i> Offset in the subject at which to start matching
|
||||
<i>options</i> Option bits
|
||||
<i>match_data</i> Points to a match data block, or is NULL
|
||||
<i>mcontext</i> Points to a match context, or is NULL
|
||||
<i>replacement</i> Points to the replacement string
|
||||
<i>rlength</i> Length of the replacement string
|
||||
<i>outputbuffer</i> Points to the output buffer
|
||||
<i>outlengthptr</i> Points to the length of the output buffer
|
||||
</pre>
|
||||
A match context is needed only if you want to:
|
||||
<pre>
|
||||
Set up a callout function
|
||||
Change the limit for calling the internal function <i>match()</i>
|
||||
Change the limit for calling <i>match()</i> recursively
|
||||
Set custom memory management when the heap is used for recursion
|
||||
</pre>
|
||||
The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code
|
||||
units, not characters, as is the contents of the variable pointed at by
|
||||
<i>outlengthptr</i>, which is updated to the actual length of the new string.
|
||||
The options are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject string is not the end of a line
|
||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||
is not a valid match
|
||||
PCRE2_NO_UTF_CHECK Do not check the subject or replacement for
|
||||
UTF validity (only relevant if PCRE2_UTF
|
||||
was set at compile time)
|
||||
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
||||
</pre>
|
||||
The function returns the number of substitutions, which may be zero if there
|
||||
were no matches. The result can be greater than one only when
|
||||
PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
58
pcre2/doc/html/pcre2_substring_copy_byname.html
Normal file
58
pcre2/doc/html/pcre2_substring_copy_byname.html
Normal file
@ -0,0 +1,58 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_copy_byname specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_copy_byname man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_copy_byname(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_UCHAR *<i>buffer</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring, identified
|
||||
by name, into a given buffer. The arguments are:
|
||||
<pre>
|
||||
<i>match_data</i> The match data block for the match
|
||||
<i>name</i> Name of the required substring
|
||||
<i>buffer</i> Buffer to receive the string
|
||||
<i>bufflen</i> Length of buffer (code units)
|
||||
</pre>
|
||||
The <i>bufflen</i> variable is updated to contain the length of the extracted
|
||||
string, excluding the trailing zero. The yield of the function is zero for
|
||||
success or one of the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY the buffer is not big enough
|
||||
</pre>
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
57
pcre2/doc/html/pcre2_substring_copy_bynumber.html
Normal file
57
pcre2/doc/html/pcre2_substring_copy_bynumber.html
Normal file
@ -0,0 +1,57 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_copy_bynumber specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_copy_bynumber man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_copy_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> uint32_t <i>number</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE *<i>bufflen</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring into a given
|
||||
buffer. The arguments are:
|
||||
<pre>
|
||||
<i>match_data</i> The match data block for the match
|
||||
<i>number</i> Number of the required substring
|
||||
<i>buffer</i> Buffer to receive the string
|
||||
<i>bufflen</i> Length of buffer
|
||||
</pre>
|
||||
The <i>bufflen</i> variable is updated with the length of the extracted string,
|
||||
excluding the terminating zero. The yield of the function is zero for success
|
||||
or one of the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY the buffer is too small
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
41
pcre2/doc/html/pcre2_substring_free.html
Normal file
41
pcre2/doc/html/pcre2_substring_free.html
Normal file
@ -0,0 +1,41 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_substring_free(PCRE2_UCHAR *<i>buffer</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for freeing the memory obtained by a previous
|
||||
call to <b>pcre2_substring_get_byname()</b> or
|
||||
<b>pcre2_substring_get_bynumber()</b>. Its only argument is a pointer to the
|
||||
string.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
60
pcre2/doc/html/pcre2_substring_get_byname.html
Normal file
60
pcre2/doc/html/pcre2_substring_get_byname.html
Normal file
@ -0,0 +1,60 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_get_byname specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_get_byname man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_get_byname(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_UCHAR **<i>bufferptr</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring by name into
|
||||
newly acquired memory. The arguments are:
|
||||
<pre>
|
||||
<i>match_data</i> The match data for the match
|
||||
<i>name</i> Name of the required substring
|
||||
<i>bufferptr</i> Where to put the string pointer
|
||||
<i>bufflen</i> Where to put the string length
|
||||
</pre>
|
||||
The memory in which the substring is placed is obtained by calling the same
|
||||
memory allocation function that was used for the match data block. The
|
||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||
it is no longer needed. The yield of the function is zero for success or one of
|
||||
the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||
</pre>
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
58
pcre2/doc/html/pcre2_substring_get_bynumber.html
Normal file
58
pcre2/doc/html/pcre2_substring_get_bynumber.html
Normal file
@ -0,0 +1,58 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_get_bynumber specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_get_bynumber man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_get_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> uint32_t <i>number</i>, PCRE2_UCHAR **<i>bufferptr</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting a captured substring by number
|
||||
into newly acquired memory. The arguments are:
|
||||
<pre>
|
||||
<i>match_data</i> The match data for the match
|
||||
<i>number</i> Number of the required substring
|
||||
<i>bufferptr</i> Where to put the string pointer
|
||||
<i>bufflen</i> Where to put the string length
|
||||
</pre>
|
||||
The memory in which the substring is placed is obtained by calling the same
|
||||
memory allocation function that was used for the match data block. The
|
||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||
it is no longer needed. The yield of the function is zero for success or one of
|
||||
the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
46
pcre2/doc/html/pcre2_substring_length_byname.html
Normal file
46
pcre2/doc/html/pcre2_substring_length_byname.html
Normal file
@ -0,0 +1,46 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_length_byname specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_length_byname man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_length_byname(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns the length of a matched substring, identified by name.
|
||||
The arguments are:
|
||||
<pre>
|
||||
<i>match_data</i> The match data block for the match
|
||||
<i>name</i> The substring name
|
||||
<i>length</i> Where to return the length
|
||||
</pre>
|
||||
The yield is zero on success, or an error code if the substring is not found.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
48
pcre2/doc/html/pcre2_substring_length_bynumber.html
Normal file
48
pcre2/doc/html/pcre2_substring_length_bynumber.html
Normal file
@ -0,0 +1,48 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_length_bynumber specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_length_bynumber man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns the length of a matched substring, identified by number.
|
||||
The arguments are:
|
||||
<pre>
|
||||
<i>match_data</i> The match data block for the match
|
||||
<i>number</i> The substring number
|
||||
<i>length</i> Where to return the length, or NULL
|
||||
</pre>
|
||||
The third argument may be NULL if all you want to know is whether or not a
|
||||
substring is set. The yield is zero on success, or a negative error code
|
||||
otherwise. After a partial match, only substring 0 is available.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
40
pcre2/doc/html/pcre2_substring_list_free.html
Normal file
40
pcre2/doc/html/pcre2_substring_list_free.html
Normal file
@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_list_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_list_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_substring_list_free(PCRE2_SPTR *<i>list</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for freeing the store obtained by a previous
|
||||
call to <b>pcre2substring_list_get()</b>. Its only argument is a pointer to
|
||||
the list of string pointers.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
56
pcre2/doc/html/pcre2_substring_list_get.html
Normal file
56
pcre2/doc/html/pcre2_substring_list_get.html
Normal file
@ -0,0 +1,56 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_list_get specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_list_get man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This is a convenience function for extracting all the captured substrings after
|
||||
a pattern match. It builds a list of pointers to the strings, and (optionally)
|
||||
a second list that contains their lengths (in code units), excluding a
|
||||
terminating zero that is added to each of them. All this is done in a single
|
||||
block of memory that is obtained using the same memory allocation function that
|
||||
was used to get the match data block. The convenience function
|
||||
<b>pcre2_substring_list_free()</b> can be used to free it when it is no longer
|
||||
needed. The arguments are:
|
||||
<pre>
|
||||
<i>match_data</i> The match data block
|
||||
<i>listptr</i> Where to put a pointer to the list
|
||||
<i>lengthsptr</i> Where to put a pointer to the lengths, or NULL
|
||||
</pre>
|
||||
A pointer to a list of pointers is put in the variable whose address is in
|
||||
<i>listptr</i>. The list is terminated by a NULL pointer. If <i>lengthsptr</i> is
|
||||
not NULL, a matching list of lengths is created, and its address is placed in
|
||||
<i>lengthsptr</i>. The yield of the function is zero on success or
|
||||
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
53
pcre2/doc/html/pcre2_substring_nametable_scan.html
Normal file
53
pcre2/doc/html/pcre2_substring_nametable_scan.html
Normal file
@ -0,0 +1,53 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_nametable_scan specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_nametable_scan man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This convenience function finds, for a compiled pattern, the first and last
|
||||
entries for a given name in the table that translates capturing parenthesis
|
||||
names into numbers.
|
||||
<pre>
|
||||
<i>code</i> Compiled regular expression
|
||||
<i>name</i> Name whose entries required
|
||||
<i>first</i> Where to return a pointer to the first entry
|
||||
<i>last</i> Where to return a pointer to the last entry
|
||||
</pre>
|
||||
When the name is found in the table, if <i>first</i> is NULL, the function
|
||||
returns a group number, but if there is more than one matching entry, it is not
|
||||
defined which one. Otherwise, when both pointers have been set, the yield of
|
||||
the function is the length of each entry in code units. If the name is not
|
||||
found, PCRE2_ERROR_NOSUBSTRING is returned.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API, including the format of
|
||||
the table entries, in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page, and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
50
pcre2/doc/html/pcre2_substring_number_from_name.html
Normal file
50
pcre2/doc/html/pcre2_substring_number_from_name.html
Normal file
@ -0,0 +1,50 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_substring_number_from_name specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_substring_number_from_name man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This convenience function finds the number of a named substring capturing
|
||||
parenthesis in a compiled pattern, provided that it is a unique name. The
|
||||
function arguments are:
|
||||
<pre>
|
||||
<i>code</i> Compiled regular expression
|
||||
<i>name</i> Name whose number is required
|
||||
</pre>
|
||||
The yield of the function is the number of the parenthesis if the name is
|
||||
found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are
|
||||
allowed (PCRE2_DUPNAMES is set), if the name is not unique,
|
||||
PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers
|
||||
with the same name by calling <b>pcre2_substring_nametable_scan()</b>.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
2892
pcre2/doc/html/pcre2api.html
Normal file
2892
pcre2/doc/html/pcre2api.html
Normal file
File diff suppressed because it is too large
Load Diff
504
pcre2/doc/html/pcre2build.html
Normal file
504
pcre2/doc/html/pcre2build.html
Normal file
@ -0,0 +1,504 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2build specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2build man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">BUILDING PCRE2</a>
|
||||
<li><a name="TOC2" href="#SEC2">PCRE2 BUILD-TIME OPTIONS</a>
|
||||
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
|
||||
<li><a name="TOC5" href="#SEC5">UNICODE AND UTF SUPPORT</a>
|
||||
<li><a name="TOC6" href="#SEC6">JUST-IN-TIME COMPILER SUPPORT</a>
|
||||
<li><a name="TOC7" href="#SEC7">NEWLINE RECOGNITION</a>
|
||||
<li><a name="TOC8" href="#SEC8">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a>
|
||||
<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a>
|
||||
<li><a name="TOC11" href="#SEC11">LIMITING PCRE2 RESOURCE USAGE</a>
|
||||
<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
||||
<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
|
||||
<li><a name="TOC14" href="#SEC14">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
||||
<li><a name="TOC15" href="#SEC15">PCRE2GREP BUFFER SIZE</a>
|
||||
<li><a name="TOC16" href="#SEC16">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
|
||||
<li><a name="TOC17" href="#SEC17">INCLUDING DEBUGGING CODE</a>
|
||||
<li><a name="TOC18" href="#SEC18">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||
<li><a name="TOC19" href="#SEC19">CODE COVERAGE REPORTING</a>
|
||||
<li><a name="TOC20" href="#SEC20">SEE ALSO</a>
|
||||
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
|
||||
<li><a name="TOC22" href="#SEC22">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
|
||||
<P>
|
||||
PCRE2 is distributed with a <b>configure</b> script that can be used to build
|
||||
the library in Unix-like environments using the applications known as
|
||||
Autotools. Also in the distribution are files to support building using
|
||||
<b>CMake</b> instead of <b>configure</b>. The text file
|
||||
<a href="README.txt"><b>README</b></a>
|
||||
contains general information about building with Autotools (some of which is
|
||||
repeated below), and also has some comments about building on various operating
|
||||
systems. There is a lot more information about building PCRE2 without using
|
||||
Autotools (including information about using <b>CMake</b> and building "by
|
||||
hand") in the text file called
|
||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
|
||||
You should consult this file as well as the
|
||||
<a href="README.txt"><b>README</b></a>
|
||||
file if you are building in a non-Unix-like environment.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE2 BUILD-TIME OPTIONS</a><br>
|
||||
<P>
|
||||
The rest of this document describes the optional features of PCRE2 that can be
|
||||
selected when the library is compiled. It assumes use of the <b>configure</b>
|
||||
script, where the optional features are selected or deselected by providing
|
||||
options to <b>configure</b> before running the <b>make</b> command. However, the
|
||||
same options can be selected in both Unix-like and non-Unix-like environments
|
||||
if you are using <b>CMake</b> instead of <b>configure</b> to build PCRE2.
|
||||
</P>
|
||||
<P>
|
||||
If you are not using Autotools or <b>CMake</b>, option selection can be done by
|
||||
editing the <b>config.h</b> file, or by passing parameter settings to the
|
||||
compiler, as described in
|
||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
|
||||
</P>
|
||||
<P>
|
||||
The complete list of options for <b>configure</b> (which includes the standard
|
||||
ones such as the selection of the installation directory) can be obtained by
|
||||
running
|
||||
<pre>
|
||||
./configure --help
|
||||
</pre>
|
||||
The following sections include descriptions of options whose names begin with
|
||||
--enable or --disable. These settings specify changes to the defaults for the
|
||||
<b>configure</b> command. Because of the way that <b>configure</b> works,
|
||||
--enable and --disable always come in pairs, so the complementary option always
|
||||
exists as well, but as it specifies the default, it is not described.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||
<P>
|
||||
By default, a library called <b>libpcre2-8</b> is built, containing functions
|
||||
that take string arguments contained in vectors of bytes, interpreted either as
|
||||
single-byte characters, or UTF-8 strings. You can also build two other
|
||||
libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process
|
||||
strings that are contained in vectors of 16-bit and 32-bit code units,
|
||||
respectively. These can be interpreted either as single-unit characters or
|
||||
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
||||
the following to the <b>configure</b> command:
|
||||
<pre>
|
||||
--enable-pcre2-16
|
||||
--enable-pcre2-32
|
||||
</pre>
|
||||
If you do not want the 8-bit library, add
|
||||
<pre>
|
||||
--disable-pcre2-8
|
||||
</pre>
|
||||
as well. At least one of the three libraries must be built. Note that the POSIX
|
||||
wrapper is for the 8-bit library only, and that <b>pcre2grep</b> is an 8-bit
|
||||
program. Neither of these are built if you select only the 16-bit or 32-bit
|
||||
libraries.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
||||
<P>
|
||||
The Autotools PCRE2 building process uses <b>libtool</b> to build both shared
|
||||
and static libraries by default. You can suppress an unwanted library by adding
|
||||
one of
|
||||
<pre>
|
||||
--disable-shared
|
||||
--disable-static
|
||||
</pre>
|
||||
to the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">UNICODE AND UTF SUPPORT</a><br>
|
||||
<P>
|
||||
By default, PCRE2 is built with support for Unicode and UTF character strings.
|
||||
To build it without Unicode support, add
|
||||
<pre>
|
||||
--disable-unicode
|
||||
</pre>
|
||||
to the <b>configure</b> command. This setting applies to all three libraries. It
|
||||
is not possible to build one library with Unicode support, and another without,
|
||||
in the same configuration.
|
||||
</P>
|
||||
<P>
|
||||
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16
|
||||
or UTF-32. To do that, applications that use the library can set the PCRE2_UTF
|
||||
option when they call <b>pcre2_compile()</b> to compile a pattern.
|
||||
Alternatively, patterns may be started with (*UTF) unless the application has
|
||||
locked this out by setting PCRE2_NEVER_UTF.
|
||||
</P>
|
||||
<P>
|
||||
UTF support allows the libraries to process character code points up to
|
||||
0x10ffff in the strings that they handle. It also provides support for
|
||||
accessing the Unicode properties of such characters, using pattern escapes such
|
||||
as \P, \p, and \X. Only the general category properties such as <i>Lu</i> and
|
||||
<i>Nd</i> are supported. Details are given in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
Pattern escapes such as \d and \w do not by default make use of Unicode
|
||||
properties. The application can request that they do by setting the PCRE2_UCP
|
||||
option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
|
||||
request this by starting with (*UCP).
|
||||
</P>
|
||||
<P>
|
||||
The \C escape sequence, which matches a single code unit, even in a UTF mode,
|
||||
can cause unpredictable behaviour because it may leave the current matching
|
||||
point in the middle of a multi-code-unit character. It can be locked out by
|
||||
setting the PCRE2_NEVER_BACKSLASH_C option.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||
<P>
|
||||
Just-in-time compiler support is included in the build by specifying
|
||||
<pre>
|
||||
--enable-jit
|
||||
</pre>
|
||||
This support is available only for certain hardware architectures. If this
|
||||
option is set for an unsupported architecture, a building error occurs.
|
||||
See the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
documentation for a discussion of JIT usage. When JIT support is enabled,
|
||||
pcre2grep automatically makes use of it, unless you add
|
||||
<pre>
|
||||
--disable-pcre2grep-jit
|
||||
</pre>
|
||||
to the "configure" command.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">NEWLINE RECOGNITION</a><br>
|
||||
<P>
|
||||
By default, PCRE2 interprets the linefeed (LF) character as indicating the end
|
||||
of a line. This is the normal newline character on Unix-like systems. You can
|
||||
compile PCRE2 to use carriage return (CR) instead, by adding
|
||||
<pre>
|
||||
--enable-newline-is-cr
|
||||
</pre>
|
||||
to the <b>configure</b> command. There is also an --enable-newline-is-lf option,
|
||||
which explicitly specifies linefeed as the newline character.
|
||||
</P>
|
||||
<P>
|
||||
Alternatively, you can specify that line endings are to be indicated by the
|
||||
two-character sequence CRLF (CR immediately followed by LF). If you want this,
|
||||
add
|
||||
<pre>
|
||||
--enable-newline-is-crlf
|
||||
</pre>
|
||||
to the <b>configure</b> command. There is a fourth option, specified by
|
||||
<pre>
|
||||
--enable-newline-is-anycrlf
|
||||
</pre>
|
||||
which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as
|
||||
indicating a line ending. Finally, a fifth option, specified by
|
||||
<pre>
|
||||
--enable-newline-is-any
|
||||
</pre>
|
||||
causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline
|
||||
sequences are the three just mentioned, plus the single characters VT (vertical
|
||||
tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line
|
||||
separator, U+2028), and PS (paragraph separator, U+2029).
|
||||
</P>
|
||||
<P>
|
||||
Whatever default line ending convention is selected when PCRE2 is built can be
|
||||
overridden by applications that use the library. At build time it is
|
||||
conventional to use the standard for your operating system.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<P>
|
||||
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
||||
independently of what has been selected as the line ending sequence. If you
|
||||
specify
|
||||
<pre>
|
||||
--enable-bsr-anycrlf
|
||||
</pre>
|
||||
the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
|
||||
selected when PCRE2 is built can be overridden by applications that use the
|
||||
called.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
||||
<P>
|
||||
Within a compiled pattern, offset values are used to point from one part to
|
||||
another (for example, from an opening parenthesis to an alternation
|
||||
metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
|
||||
are used for these offsets, leading to a maximum size for a compiled pattern of
|
||||
around 64K code units. This is sufficient to handle all but the most gigantic
|
||||
patterns. Nevertheless, some people do want to process truly enormous patterns,
|
||||
so it is possible to compile PCRE2 to use three-byte or four-byte offsets by
|
||||
adding a setting such as
|
||||
<pre>
|
||||
--with-link-size=3
|
||||
</pre>
|
||||
to the <b>configure</b> command. The value given must be 2, 3, or 4. For the
|
||||
16-bit library, a value of 3 is rounded up to 4. In these libraries, using
|
||||
longer offsets slows down the operation of PCRE2 because it has to load
|
||||
additional data when handling them. For the 32-bit library the value is always
|
||||
4 and cannot be overridden; the value of --with-link-size is ignored.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
||||
<P>
|
||||
When matching with the <b>pcre2_match()</b> function, PCRE2 implements
|
||||
backtracking by making recursive calls to an internal function called
|
||||
<b>match()</b>. In environments where the size of the stack is limited, this can
|
||||
severely limit PCRE2's operation. (The Unix environment does not usually suffer
|
||||
from this problem, but it may sometimes be necessary to increase the maximum
|
||||
stack size. There is a discussion in the
|
||||
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
||||
documentation.) An alternative approach to recursion that uses memory from the
|
||||
heap to remember data, instead of using recursive function calls, has been
|
||||
implemented to work round the problem of limited stack size. If you want to
|
||||
build a version of PCRE2 that works this way, add
|
||||
<pre>
|
||||
--disable-stack-for-recursion
|
||||
</pre>
|
||||
to the <b>configure</b> command. By default, the system functions <b>malloc()</b>
|
||||
and <b>free()</b> are called to manage the heap memory that is required, but
|
||||
custom memory management functions can be called instead. PCRE2 runs noticeably
|
||||
more slowly when built in this way. This option affects only the
|
||||
<b>pcre2_match()</b> function; it is not relevant for <b>pcre2_dfa_match()</b>.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">LIMITING PCRE2 RESOURCE USAGE</a><br>
|
||||
<P>
|
||||
Internally, PCRE2 has a function called <b>match()</b>, which it calls
|
||||
repeatedly (sometimes recursively) when matching a pattern with the
|
||||
<b>pcre2_match()</b> function. By controlling the maximum number of times this
|
||||
function may be called during a single matching operation, a limit can be
|
||||
placed on the resources used by a single call to <b>pcre2_match()</b>. The limit
|
||||
can be changed at run time, as described in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation. The default is 10 million, but this can be changed by adding a
|
||||
setting such as
|
||||
<pre>
|
||||
--with-match-limit=500000
|
||||
</pre>
|
||||
to the <b>configure</b> command. This setting has no effect on the
|
||||
<b>pcre2_dfa_match()</b> matching function.
|
||||
</P>
|
||||
<P>
|
||||
In some environments it is desirable to limit the depth of recursive calls of
|
||||
<b>match()</b> more strictly than the total number of calls, in order to
|
||||
restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
|
||||
is specified) that is used. A second limit controls this; it defaults to the
|
||||
value that is set for --with-match-limit, which imposes no additional
|
||||
constraints. However, you can set a lower limit by adding, for example,
|
||||
<pre>
|
||||
--with-match-limit-recursion=10000
|
||||
</pre>
|
||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||
<P>
|
||||
PCRE2 uses fixed tables for processing characters whose code points are less
|
||||
than 256. By default, PCRE2 is built with a set of tables that are distributed
|
||||
in the file <i>src/pcre2_chartables.c.dist</i>. These tables are for ASCII codes
|
||||
only. If you add
|
||||
<pre>
|
||||
--enable-rebuild-chartables
|
||||
</pre>
|
||||
to the <b>configure</b> command, the distributed tables are no longer used.
|
||||
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
|
||||
source for new set of tables, created in the default locale of your C run-time
|
||||
system. (This method of replacing the tables does not work if you are cross
|
||||
compiling, because <b>dftables</b> is run on the local host. If you need to
|
||||
create alternative tables when cross compiling, you will have to do so "by
|
||||
hand".)
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||
<P>
|
||||
PCRE2 assumes by default that it will run in an environment where the character
|
||||
code is ASCII or Unicode, which is a superset of ASCII. This is the case for
|
||||
most computer operating systems. PCRE2 can, however, be compiled to run in an
|
||||
8-bit EBCDIC environment by adding
|
||||
<pre>
|
||||
--enable-ebcdic --disable-unicode
|
||||
</pre>
|
||||
to the <b>configure</b> command. This setting implies
|
||||
--enable-rebuild-chartables. You should only use it if you know that you are in
|
||||
an EBCDIC environment (for example, an IBM mainframe operating system).
|
||||
</P>
|
||||
<P>
|
||||
It is not possible to support both EBCDIC and UTF-8 codes in the same version
|
||||
of the library. Consequently, --enable-unicode and --enable-ebcdic are mutually
|
||||
exclusive.
|
||||
</P>
|
||||
<P>
|
||||
The EBCDIC character that corresponds to an ASCII LF is assumed to have the
|
||||
value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In
|
||||
such an environment you should use
|
||||
<pre>
|
||||
--enable-ebcdic-nl25
|
||||
</pre>
|
||||
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the
|
||||
same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is <i>not</i>
|
||||
chosen as LF is made to correspond to the Unicode NEL character (which, in
|
||||
Unicode, is 0x85).
|
||||
</P>
|
||||
<P>
|
||||
The options that select newline behaviour, such as --enable-newline-is-cr,
|
||||
and equivalent run-time options, refer to these character values in an EBCDIC
|
||||
environment.
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||
<P>
|
||||
By default, <b>pcre2grep</b> reads all files as plain text. You can build it so
|
||||
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
||||
them with <b>libz</b> or <b>libbz2</b>, respectively, by adding one or both of
|
||||
<pre>
|
||||
--enable-pcre2grep-libz
|
||||
--enable-pcre2grep-libbz2
|
||||
</pre>
|
||||
to the <b>configure</b> command. These options naturally require that the
|
||||
relevant libraries are installed on your system. Configuration will fail if
|
||||
they are not.
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
|
||||
<P>
|
||||
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
|
||||
scanning, in order to be able to output "before" and "after" lines when it
|
||||
finds a match. The size of the buffer is controlled by a parameter whose
|
||||
default value is 20K. The buffer itself is three times this size, but because
|
||||
of the way it is used for holding "before" lines, the longest line that is
|
||||
guaranteed to be processable is the parameter size. You can change the default
|
||||
parameter value by adding, for example,
|
||||
<pre>
|
||||
--with-pcre2grep-bufsize=50K
|
||||
</pre>
|
||||
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override this
|
||||
value by using --buffer-size on the command line..
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||
<P>
|
||||
If you add one of
|
||||
<pre>
|
||||
--enable-pcre2test-libreadline
|
||||
--enable-pcre2test-libedit
|
||||
</pre>
|
||||
to the <b>configure</b> command, <b>pcre2test</b> is linked with the
|
||||
<b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is
|
||||
from a terminal, it reads it using the <b>readline()</b> function. This provides
|
||||
line-editing and history facilities. Note that <b>libreadline</b> is
|
||||
GPL-licensed, so if you distribute a binary of <b>pcre2test</b> linked in this
|
||||
way, there may be licensing issues. These can be avoided by linking instead
|
||||
with <b>libedit</b>, which has a BSD licence.
|
||||
</P>
|
||||
<P>
|
||||
Setting --enable-pcre2test-libreadline causes the <b>-lreadline</b> option to be
|
||||
added to the <b>pcre2test</b> build. In many operating environments with a
|
||||
sytem-installed readline library this is sufficient. However, in some
|
||||
environments (e.g. if an unmodified distribution version of readline is in
|
||||
use), some extra configuration may be necessary. The INSTALL file for
|
||||
<b>libreadline</b> says this:
|
||||
<pre>
|
||||
"Readline uses the termcap functions, but does not link with
|
||||
the termcap or curses library itself, allowing applications
|
||||
which link with readline the to choose an appropriate library."
|
||||
</pre>
|
||||
If your environment has not been set up so that an appropriate library is
|
||||
automatically included, you may need to add something like
|
||||
<pre>
|
||||
LIBS="-ncurses"
|
||||
</pre>
|
||||
immediately before the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
|
||||
<P>
|
||||
If you add
|
||||
<pre>
|
||||
--enable-debug
|
||||
</pre>
|
||||
to the <b>configure</b> command, additional debugging code is included in the
|
||||
build. This feature is intended for use by the PCRE2 maintainers.
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
||||
<P>
|
||||
If you add
|
||||
<pre>
|
||||
--enable-valgrind
|
||||
</pre>
|
||||
to the <b>configure</b> command, PCRE2 will use valgrind annotations to mark
|
||||
certain memory regions as unaddressable. This allows it to detect invalid
|
||||
memory accesses, and is mostly useful for debugging PCRE2 itself.
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
||||
<P>
|
||||
If your C compiler is gcc, you can build a version of PCRE2 that can generate a
|
||||
code coverage report for its test suite. To enable this, you must install
|
||||
<b>lcov</b> version 1.6 or above. Then specify
|
||||
<pre>
|
||||
--enable-coverage
|
||||
</pre>
|
||||
to the <b>configure</b> command and build PCRE2 in the usual way.
|
||||
</P>
|
||||
<P>
|
||||
Note that using <b>ccache</b> (a caching C compiler) is incompatible with code
|
||||
coverage reporting. If you have configured <b>ccache</b> to run automatically
|
||||
on your system, you must set the environment variable
|
||||
<pre>
|
||||
CCACHE_DISABLE=1
|
||||
</pre>
|
||||
before running <b>make</b> to build PCRE2, so that <b>ccache</b> is not used.
|
||||
</P>
|
||||
<P>
|
||||
When --enable-coverage is used, the following addition targets are added to the
|
||||
<i>Makefile</i>:
|
||||
<pre>
|
||||
make coverage
|
||||
</pre>
|
||||
This creates a fresh coverage report for the PCRE2 test suite. It is equivalent
|
||||
to running "make coverage-reset", "make coverage-baseline", "make check", and
|
||||
then "make coverage-report".
|
||||
<pre>
|
||||
make coverage-reset
|
||||
</pre>
|
||||
This zeroes the coverage counters, but does nothing else.
|
||||
<pre>
|
||||
make coverage-baseline
|
||||
</pre>
|
||||
This captures baseline coverage information.
|
||||
<pre>
|
||||
make coverage-report
|
||||
</pre>
|
||||
This creates the coverage report.
|
||||
<pre>
|
||||
make coverage-clean-report
|
||||
</pre>
|
||||
This removes the generated coverage report without cleaning the coverage data
|
||||
itself.
|
||||
<pre>
|
||||
make coverage-clean-data
|
||||
</pre>
|
||||
This removes the captured coverage data without removing the coverage files
|
||||
created at compile time (*.gcno).
|
||||
<pre>
|
||||
make coverage-clean
|
||||
</pre>
|
||||
This cleans all coverage data including the generated coverage report. For more
|
||||
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 April 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
408
pcre2/doc/html/pcre2callout.html
Normal file
408
pcre2/doc/html/pcre2callout.html
Normal file
@ -0,0 +1,408 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2callout specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2callout man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
|
||||
<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
|
||||
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM CALLOUTS</a>
|
||||
<li><a name="TOC6" href="#SEC6">CALLOUT ENUMERATION</a>
|
||||
<li><a name="TOC7" href="#SEC7">AUTHOR</a>
|
||||
<li><a name="TOC8" href="#SEC8">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int (*pcre2_callout)(pcre2_callout_block *, void *);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||
<b> void *<i>user_data</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
PCRE2 provides a feature called "callout", which is a means of temporarily
|
||||
passing control to the caller of PCRE2 in the middle of pattern matching. The
|
||||
caller of PCRE2 provides an external function by putting its entry point in
|
||||
a match context (see <b>pcre2_set_callout()</b> in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation).
|
||||
</P>
|
||||
<P>
|
||||
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||
function is to be called. Different callout points can be identified by putting
|
||||
a number less than 256 after the letter C. The default value is zero.
|
||||
Alternatively, the argument may be a delimited string. The starting delimiter
|
||||
must be one of ` ' " ^ % # $ { and the ending delimiter is the same as the
|
||||
start, except for {, where the ending delimiter is }. If the ending delimiter
|
||||
is needed within the string, it must be doubled. For example, this pattern has
|
||||
two callout points:
|
||||
<pre>
|
||||
(?C1)abc(?C"some ""arbitrary"" text")def
|
||||
</pre>
|
||||
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
|
||||
automatically inserts callouts, all with number 255, before each item in the
|
||||
pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
|
||||
<pre>
|
||||
A(\d{2}|--)
|
||||
</pre>
|
||||
it is processed as if it were
|
||||
<br>
|
||||
<br>
|
||||
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
||||
<br>
|
||||
<br>
|
||||
Notice that there is a callout before and after each parenthesis and
|
||||
alternation bar. If the pattern contains a conditional group whose condition is
|
||||
an assertion, an automatic callout is inserted immediately before the
|
||||
condition. Such a callout may also be inserted explicitly, for example:
|
||||
<pre>
|
||||
(?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de)
|
||||
</pre>
|
||||
This applies only to assertion conditions (because they are themselves
|
||||
independent groups).
|
||||
</P>
|
||||
<P>
|
||||
Callouts can be useful for tracking the progress of pattern matching. The
|
||||
<a href="pcre2test.html"><b>pcre2test</b></a>
|
||||
program has a pattern qualifier (/auto_callout) that sets automatic callouts.
|
||||
When any callouts are present, the output from <b>pcre2test</b> indicates how
|
||||
the pattern is being matched. This is useful information when you are trying to
|
||||
optimize the performance of a particular pattern.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||
<P>
|
||||
You should be aware that, because of optimizations in the way PCRE2 compiles
|
||||
and matches patterns, callouts sometimes do not happen exactly as you might
|
||||
expect.
|
||||
</P>
|
||||
<br><b>
|
||||
Auto-possessification
|
||||
</b><br>
|
||||
<P>
|
||||
At compile time, PCRE2 "auto-possessifies" repeated items when it knows that
|
||||
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||
if it were a++[bc]. The <b>pcre2test</b> output when this pattern is compiled
|
||||
with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string
|
||||
"aaaa" is:
|
||||
<pre>
|
||||
--->aaaa
|
||||
+0 ^ a+
|
||||
+2 ^ ^ [bc]
|
||||
No match
|
||||
</pre>
|
||||
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||
You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to
|
||||
<b>pcre2_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). In this
|
||||
case, the output changes to this:
|
||||
<pre>
|
||||
--->aaaa
|
||||
+0 ^ a+
|
||||
+2 ^ ^ [bc]
|
||||
+2 ^ ^ [bc]
|
||||
+2 ^ ^ [bc]
|
||||
+2 ^^ [bc]
|
||||
No match
|
||||
</pre>
|
||||
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||
again, repeatedly, until a+ itself fails.
|
||||
</P>
|
||||
<br><b>
|
||||
Automatic .* anchoring
|
||||
</b><br>
|
||||
<P>
|
||||
By default, an optimization is applied when .* is the first significant item in
|
||||
a pattern. If PCRE2_DOTALL is set, so that the dot can match any character, the
|
||||
pattern is automatically anchored. If PCRE2_DOTALL is not set, a match can
|
||||
start only after an internal newline or at the beginning of the subject, and
|
||||
<b>pcre2_compile()</b> remembers this. This optimization is disabled, however,
|
||||
if .* is in an atomic group or if there is a back reference to the capturing
|
||||
group in which it appears. It is also disabled if the pattern contains (*PRUNE)
|
||||
or (*SKIP). However, the presence of callouts does not affect it.
|
||||
</P>
|
||||
<P>
|
||||
For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT and
|
||||
applied to the string "aa", the <b>pcre2test</b> output is:
|
||||
<pre>
|
||||
--->aa
|
||||
+0 ^ .*
|
||||
+2 ^ ^ \d
|
||||
+2 ^^ \d
|
||||
+2 ^ \d
|
||||
No match
|
||||
</pre>
|
||||
This shows that all match attempts start at the beginning of the subject. In
|
||||
other words, the pattern is anchored. You can disable this optimization by
|
||||
passing PCRE2_NO_DOTSTAR_ANCHOR to <b>pcre2_compile()</b>, or starting the
|
||||
pattern with (*NO_DOTSTAR_ANCHOR). In this case, the output changes to:
|
||||
<pre>
|
||||
--->aa
|
||||
+0 ^ .*
|
||||
+2 ^ ^ \d
|
||||
+2 ^^ \d
|
||||
+2 ^ \d
|
||||
+0 ^ .*
|
||||
+2 ^^ \d
|
||||
+2 ^ \d
|
||||
No match
|
||||
</pre>
|
||||
This shows more match attempts, starting at the second subject character.
|
||||
Another optimization, described in the next section, means that there is no
|
||||
subsequent attempt to match with an empty subject.
|
||||
</P>
|
||||
<P>
|
||||
If a pattern has more than one top-level branch, automatic anchoring occurs if
|
||||
all branches are anchorable.
|
||||
</P>
|
||||
<br><b>
|
||||
Other optimizations
|
||||
</b><br>
|
||||
<P>
|
||||
Other optimizations that provide fast "no match" results also affect callouts.
|
||||
For example, if the pattern is
|
||||
<pre>
|
||||
ab(?C4)cd
|
||||
</pre>
|
||||
PCRE2 knows that any matching string must contain the letter "d". If the
|
||||
subject string is "abyz", the lack of "d" means that matching doesn't ever
|
||||
start, and the callout is never reached. However, with "abyd", though the
|
||||
result is still no match, the callout is obeyed.
|
||||
</P>
|
||||
<P>
|
||||
PCRE2 also knows the minimum length of a matching string, and will immediately
|
||||
give a "no match" return without actually running a match if the subject is not
|
||||
long enough, or, for unanchored patterns, if it has been scanned far enough.
|
||||
</P>
|
||||
<P>
|
||||
You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE
|
||||
option to <b>pcre2_compile()</b>, or by starting the pattern with
|
||||
(*NO_START_OPT). This slows down the matching process, but does ensure that
|
||||
callouts such as the example above are obeyed.
|
||||
<a name="calloutinterface"></a></P>
|
||||
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||
<P>
|
||||
During matching, when PCRE2 reaches a callout point, if an external function is
|
||||
set in the match context, it is called. This applies to both normal and DFA
|
||||
matching. The first argument to the callout function is a pointer to a
|
||||
<b>pcre2_callout</b> block. The second argument is the void * callout data that
|
||||
was supplied when the callout was set up by calling <b>pcre2_set_callout()</b>
|
||||
(see the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation). The callout block structure contains the following fields:
|
||||
<pre>
|
||||
uint32_t <i>version</i>;
|
||||
uint32_t <i>callout_number</i>;
|
||||
uint32_t <i>capture_top</i>;
|
||||
uint32_t <i>capture_last</i>;
|
||||
PCRE2_SIZE *<i>offset_vector</i>;
|
||||
PCRE2_SPTR <i>mark</i>;
|
||||
PCRE2_SPTR <i>subject</i>;
|
||||
PCRE2_SIZE <i>subject_length</i>;
|
||||
PCRE2_SIZE <i>start_match</i>;
|
||||
PCRE2_SIZE <i>current_position</i>;
|
||||
PCRE2_SIZE <i>pattern_position</i>;
|
||||
PCRE2_SIZE <i>next_item_length</i>;
|
||||
PCRE2_SIZE <i>callout_string_offset</i>;
|
||||
PCRE2_SIZE <i>callout_string_length</i>;
|
||||
PCRE2_SPTR <i>callout_string</i>;
|
||||
</pre>
|
||||
The <i>version</i> field contains the version number of the block format. The
|
||||
current version is 1; the three callout string fields were added for this
|
||||
version. If you are writing an application that might use an earlier release of
|
||||
PCRE2, you should check the version number before accessing any of these
|
||||
fields. The version number will increase in future if more fields are added,
|
||||
but the intention is never to remove any of the existing fields.
|
||||
</P>
|
||||
<br><b>
|
||||
Fields for numerical callouts
|
||||
</b><br>
|
||||
<P>
|
||||
For a numerical callout, <i>callout_string</i> is NULL, and <i>callout_number</i>
|
||||
contains the number of the callout, in the range 0-255. This is the number
|
||||
that follows (?C for manual callouts; it is 255 for automatically generated
|
||||
callouts.
|
||||
</P>
|
||||
<br><b>
|
||||
Fields for string callouts
|
||||
</b><br>
|
||||
<P>
|
||||
For callouts with string arguments, <i>callout_number</i> is always zero, and
|
||||
<i>callout_string</i> points to the string that is contained within the compiled
|
||||
pattern. Its length is given by <i>callout_string_length</i>. Duplicated ending
|
||||
delimiters that were present in the original pattern string have been turned
|
||||
into single characters, but there is no other processing of the callout string
|
||||
argument. An additional code unit containing binary zero is present after the
|
||||
string, but is not included in the length. The delimiter that was used to start
|
||||
the string is also stored within the pattern, immediately before the string
|
||||
itself. You can access this delimiter as <i>callout_string</i>[-1] if you need
|
||||
it.
|
||||
</P>
|
||||
<P>
|
||||
The <i>callout_string_offset</i> field is the code unit offset to the start of
|
||||
the callout argument string within the original pattern string. This is
|
||||
provided for the benefit of applications such as script languages that might
|
||||
need to report errors in the callout string within the pattern.
|
||||
</P>
|
||||
<br><b>
|
||||
Fields for all callouts
|
||||
</b><br>
|
||||
<P>
|
||||
The remaining fields in the callout block are the same for both kinds of
|
||||
callout.
|
||||
</P>
|
||||
<P>
|
||||
The <i>offset_vector</i> field is a pointer to the vector of capturing offsets
|
||||
(the "ovector") that was passed to the matching function in the match data
|
||||
block. When <b>pcre2_match()</b> is used, the contents can be inspected in
|
||||
order to extract substrings that have been matched so far, in the same way as
|
||||
for extracting substrings after a match has completed. For the DFA matching
|
||||
function, this field is not useful.
|
||||
</P>
|
||||
<P>
|
||||
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
|
||||
that were passed to the matching function.
|
||||
</P>
|
||||
<P>
|
||||
The <i>start_match</i> field normally contains the offset within the subject at
|
||||
which the current match attempt started. However, if the escape sequence \K
|
||||
has been encountered, this value is changed to reflect the modified starting
|
||||
point. If the pattern is not anchored, the callout function may be called
|
||||
several times from the same point in the pattern for different starting points
|
||||
in the subject.
|
||||
</P>
|
||||
<P>
|
||||
The <i>current_position</i> field contains the offset within the subject of the
|
||||
current match pointer.
|
||||
</P>
|
||||
<P>
|
||||
When the <b>pcre2_match()</b> is used, the <i>capture_top</i> field contains one
|
||||
more than the number of the highest numbered captured substring so far. If no
|
||||
substrings have been captured, the value of <i>capture_top</i> is one. This is
|
||||
always the case when the DFA functions are used, because they do not support
|
||||
captured substrings.
|
||||
</P>
|
||||
<P>
|
||||
The <i>capture_last</i> field contains the number of the most recently captured
|
||||
substring. However, when a recursion exits, the value reverts to what it was
|
||||
outside the recursion, as do the values of all captured substrings. If no
|
||||
substrings have been captured, the value of <i>capture_last</i> is 0. This is
|
||||
always the case for the DFA matching functions.
|
||||
</P>
|
||||
<P>
|
||||
The <i>pattern_position</i> field contains the offset in the pattern string to
|
||||
the next item to be matched.
|
||||
</P>
|
||||
<P>
|
||||
The <i>next_item_length</i> field contains the length of the next item to be
|
||||
matched in the pattern string. When the callout immediately precedes an
|
||||
alternation bar, a closing parenthesis, or the end of the pattern, the length
|
||||
is zero. When the callout precedes an opening parenthesis, the length is that
|
||||
of the entire subpattern.
|
||||
</P>
|
||||
<P>
|
||||
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
||||
help in distinguishing between different automatic callouts, which all have the
|
||||
same callout number. However, they are set for all callouts, and are used by
|
||||
<b>pcre2test</b> to show the next item to be matched when displaying callout
|
||||
information.
|
||||
</P>
|
||||
<P>
|
||||
In callouts from <b>pcre2_match()</b> the <i>mark</i> field contains a pointer to
|
||||
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
||||
(*THEN) item in the match, or NULL if no such items have been passed. Instances
|
||||
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
||||
callouts from the DFA matching function this field always contains NULL.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">RETURN VALUES FROM CALLOUTS</a><br>
|
||||
<P>
|
||||
The external callout function returns an integer to PCRE2. If the value is
|
||||
zero, matching proceeds as normal. If the value is greater than zero, matching
|
||||
fails at the current point, but the testing of other matching possibilities
|
||||
goes ahead, just as if a lookahead assertion had failed. If the value is less
|
||||
than zero, the match is abandoned, and the matching function returns the
|
||||
negative value.
|
||||
</P>
|
||||
<P>
|
||||
Negative values should normally be chosen from the set of PCRE2_ERROR_xxx
|
||||
values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match"
|
||||
failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
|
||||
functions; it will never be used by PCRE2 itself.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">CALLOUT ENUMERATION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||
<b> void *<i>user_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
A script language that supports the use of string arguments in callouts might
|
||||
like to scan all the callouts in a pattern before running the match. This can
|
||||
be done by calling <b>pcre2_callout_enumerate()</b>. The first argument is a
|
||||
pointer to a compiled pattern, the second points to a callback function, and
|
||||
the third is arbitrary user data. The callback function is called for every
|
||||
callout in the pattern in the order in which they appear. Its first argument is
|
||||
a pointer to a callout enumeration block, and its second argument is the
|
||||
<i>user_data</i> value that was passed to <b>pcre2_callout_enumerate()</b>. The
|
||||
data block contains the following fields:
|
||||
<pre>
|
||||
<i>version</i> Block version number
|
||||
<i>pattern_position</i> Offset to next item in pattern
|
||||
<i>next_item_length</i> Length of next item in pattern
|
||||
<i>callout_number</i> Number for numbered callouts
|
||||
<i>callout_string_offset</i> Offset to string within pattern
|
||||
<i>callout_string_length</i> Length of callout string
|
||||
<i>callout_string</i> Points to callout string or is NULL
|
||||
</pre>
|
||||
The version number is currently 0. It will increase if new fields are ever
|
||||
added to the block. The remaining fields are the same as their namesakes in the
|
||||
<b>pcre2_callout</b> block that is used for callouts during matching, as
|
||||
described
|
||||
<a href="#calloutinterface">above.</a>
|
||||
</P>
|
||||
<P>
|
||||
Note that the value of <i>pattern_position</i> is unique for each callout.
|
||||
However, if a callout occurs inside a group that is quantified with a non-zero
|
||||
minimum or a fixed maximum, the group is replicated inside the compiled
|
||||
pattern. For example, a pattern such as /(a){2}/ is compiled as if it were
|
||||
/(a)(a)/. This means that the callout will be enumerated more than once, but
|
||||
with the same value for <i>pattern_position</i> in each case.
|
||||
</P>
|
||||
<P>
|
||||
The callback function should normally return zero. If it returns a non-zero
|
||||
value, scanning the pattern stops, and that value is returned from
|
||||
<b>pcre2_callout_enumerate()</b>.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 23 March 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
223
pcre2/doc/html/pcre2compat.html
Normal file
223
pcre2/doc/html/pcre2compat.html
Normal file
@ -0,0 +1,223 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2compat specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2compat man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
DIFFERENCES BETWEEN PCRE2 AND PERL
|
||||
</b><br>
|
||||
<P>
|
||||
This document describes the differences in the ways that PCRE2 and Perl handle
|
||||
regular expressions. The differences described here are with respect to Perl
|
||||
versions 5.10 and above.
|
||||
</P>
|
||||
<P>
|
||||
1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
|
||||
have are given in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
2. PCRE2 allows repeat quantifiers only on parenthesized assertions, but they
|
||||
do not mean what you might think. For example, (?!a){3} does not assert that
|
||||
the next three characters are not "a". It just asserts that the next character
|
||||
is not "a" three times (in principle: PCRE2 optimizes this to run the assertion
|
||||
just once). Perl allows repeat quantifiers on other assertions such as \b, but
|
||||
these do not seem to have any use.
|
||||
</P>
|
||||
<P>
|
||||
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
||||
counted, but their entries in the offsets vector are never set. Perl sometimes
|
||||
(but not always) sets its numerical variables from inside negative assertions.
|
||||
</P>
|
||||
<P>
|
||||
4. The following Perl escape sequences are not supported: \l, \u, \L,
|
||||
\U, and \N when followed by a character name or Unicode value. (\N on its
|
||||
own, matching a non-newline character, is supported.) In fact these are
|
||||
implemented by Perl's general string-handling and are not part of its pattern
|
||||
matching engine. If any of these are encountered by PCRE2, an error is
|
||||
generated by default. However, if the PCRE2_ALT_BSUX option is set,
|
||||
\U and \u are interpreted as ECMAScript interprets them.
|
||||
</P>
|
||||
<P>
|
||||
5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
|
||||
built with Unicode support. The properties that can be tested with \p and \P
|
||||
are limited to the general category properties such as Lu and Nd, script names
|
||||
such as Greek or Han, and the derived properties Any and L&. PCRE2 does support
|
||||
the Cs (surrogate) property, which Perl does not; the Perl documentation says
|
||||
"Because Perl hides the need for the user to understand the internal
|
||||
representation of Unicode characters, there is no need to implement the
|
||||
somewhat messy concept of surrogates."
|
||||
</P>
|
||||
<P>
|
||||
6. PCRE2 does support the \Q...\E escape for quoting substrings. Characters
|
||||
in between are treated as literals. This is slightly different from Perl in
|
||||
that $ and @ are also handled as literals inside the quotes. In Perl, they
|
||||
cause variable interpolation (but of course PCRE2 does not have variables).
|
||||
Note the following examples:
|
||||
<pre>
|
||||
Pattern PCRE2 matches Perl matches
|
||||
|
||||
\Qabc$xyz\E abc$xyz abc followed by the contents of $xyz
|
||||
\Qabc\$xyz\E abc\$xyz abc\$xyz
|
||||
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
|
||||
</pre>
|
||||
The \Q...\E sequence is recognized both inside and outside character classes.
|
||||
</P>
|
||||
<P>
|
||||
7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
|
||||
constructions. However, there is support for recursive patterns. This is not
|
||||
available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE2 "callout"
|
||||
feature allows an external function to be called during pattern matching. See
|
||||
the
|
||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||
documentation for details.
|
||||
</P>
|
||||
<P>
|
||||
8. Subroutine calls (whether recursive or not) are treated as atomic groups.
|
||||
Atomic recursion is like Python, but unlike Perl. Captured values that are set
|
||||
outside a subroutine call can be referenced from inside in PCRE2, but not in
|
||||
Perl. There is a discussion that explains these differences in more detail in
|
||||
the
|
||||
<a href="pcre2pattern.html#recursiondifference">section on recursion differences from Perl</a>
|
||||
in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
9. If any of the backtracking control verbs are used in a subpattern that is
|
||||
called as a subroutine (whether or not recursively), their effect is confined
|
||||
to that subpattern; it does not extend to the surrounding pattern. This is not
|
||||
always the case in Perl. In particular, if (*THEN) is present in a group that
|
||||
is called as a subroutine, its action is limited to that group, even if the
|
||||
group does not contain any | characters. Note that such subpatterns are
|
||||
processed as anchored at the point where they are tested.
|
||||
</P>
|
||||
<P>
|
||||
10. If a pattern contains more than one backtracking control verb, the first
|
||||
one that is backtracked onto acts. For example, in the pattern
|
||||
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
|
||||
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
|
||||
same as PCRE2, but there are examples where it differs.
|
||||
</P>
|
||||
<P>
|
||||
11. Most backtracking verbs in assertions have their normal actions. They are
|
||||
not confined to the assertion.
|
||||
</P>
|
||||
<P>
|
||||
12. There are some differences that are concerned with the settings of captured
|
||||
strings when part of a pattern is repeated. For example, matching "aba" against
|
||||
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
|
||||
"b".
|
||||
</P>
|
||||
<P>
|
||||
13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
|
||||
names is not as general as Perl's. This is a consequence of the fact the PCRE2
|
||||
works internally just with numbers, using an external table to translate
|
||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
||||
where the two capturing parentheses have the same number but different names,
|
||||
is not supported, and causes an error at compile time. If it were allowed, it
|
||||
would not be possible to distinguish which parentheses matched, because both
|
||||
names map to capturing subpattern number 1. To avoid this confusing situation,
|
||||
an error is given at compile time.
|
||||
</P>
|
||||
<P>
|
||||
14. Perl recognizes comments in some places that PCRE2 does not, for example,
|
||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||
deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set.
|
||||
</P>
|
||||
<P>
|
||||
15. Perl, when in warning mode, gives warnings for character classes such as
|
||||
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
|
||||
warning features, so it gives an error in these cases because they are almost
|
||||
certainly user mistakes.
|
||||
</P>
|
||||
<P>
|
||||
16. In PCRE2, the upper/lower case character properties Lu and Ll are not
|
||||
affected when case-independent matching is specified. For example, \p{Lu}
|
||||
always matches an upper case letter. I think Perl has changed in this respect;
|
||||
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
|
||||
letters, regardless of case, when case independence is specified.
|
||||
</P>
|
||||
<P>
|
||||
17. PCRE2 provides some extensions to the Perl regular expression facilities.
|
||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||
of which (such as named parentheses) have been in PCRE2 for some time. This
|
||||
list is with respect to Perl 5.10:
|
||||
<br>
|
||||
<br>
|
||||
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
|
||||
each alternative branch of a lookbehind assertion can match a different length
|
||||
of string. Perl requires them all to have the same length.
|
||||
<br>
|
||||
<br>
|
||||
(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
|
||||
meta-character matches only at the very end of the string.
|
||||
<br>
|
||||
<br>
|
||||
(c) A backslash followed by a letter with no special meaning is faulted. (Perl
|
||||
can be made to issue a warning.)
|
||||
<br>
|
||||
<br>
|
||||
(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
|
||||
inverted, that is, by default they are not greedy, but if followed by a
|
||||
question mark they are.
|
||||
<br>
|
||||
<br>
|
||||
(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
|
||||
only at the first matching position in the subject string.
|
||||
<br>
|
||||
<br>
|
||||
(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
|
||||
PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.
|
||||
<br>
|
||||
<br>
|
||||
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
||||
by the PCRE2_BSR_ANYCRLF option.
|
||||
<br>
|
||||
<br>
|
||||
(h) The callout facility is PCRE2-specific.
|
||||
<br>
|
||||
<br>
|
||||
(i) The partial matching facility is PCRE2-specific.
|
||||
<br>
|
||||
<br>
|
||||
(j) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
|
||||
different way and is not Perl-compatible.
|
||||
<br>
|
||||
<br>
|
||||
(k) PCRE2 recognizes some special sequences such as (*CR) at the start of
|
||||
a pattern that set overall options that cannot be changed within the pattern.
|
||||
</P>
|
||||
<br><b>
|
||||
AUTHOR
|
||||
</b><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><b>
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 15 March 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
443
pcre2/doc/html/pcre2demo.html
Normal file
443
pcre2/doc/html/pcre2demo.html
Normal file
@ -0,0 +1,443 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2demo specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2demo man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
</ul>
|
||||
<PRE>
|
||||
/*************************************************
|
||||
* PCRE2 DEMONSTRATION PROGRAM *
|
||||
*************************************************/
|
||||
|
||||
/* This is a demonstration program to illustrate a straightforward way of
|
||||
calling the PCRE2 regular expression library from a C program. See the
|
||||
pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
||||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||
incompatible with the original PCRE API.
|
||||
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
width. This demonstration program uses the 8-bit library.
|
||||
|
||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||
libraries, you should be able to compile this program using this command:
|
||||
|
||||
gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
|
||||
|
||||
If PCRE2 is not installed in a standard place, it is likely to be installed
|
||||
with support for the pkg-config mechanism. If you have pkg-config, you can
|
||||
compile this program using this command:
|
||||
|
||||
gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
|
||||
|
||||
If you do not have pkg-config, you may have to use this:
|
||||
|
||||
gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
|
||||
-R/usr/local/lib -lpcre2-8 -o pcre2demo
|
||||
|
||||
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||
library files for PCRE2 are installed on your system. Only some operating
|
||||
systems (Solaris is one) use the -R option.
|
||||
|
||||
Building under Windows:
|
||||
|
||||
If you want to statically link this program against a non-dll .a file, you must
|
||||
define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
|
||||
the following line. */
|
||||
|
||||
/* #define PCRE2_STATIC */
|
||||
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
such as pcre2_compile(). */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre2.h>
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* Here is the program. The API includes the concept of "contexts" for *
|
||||
* setting up unusual interface requirements for compiling and matching, *
|
||||
* such as custom memory managers and non-standard newline definitions. *
|
||||
* This program does not do any of this, so it makes no use of contexts, *
|
||||
* always passing NULL where a context could be given. *
|
||||
**************************************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
pcre2_code *re;
|
||||
PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */
|
||||
PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */
|
||||
PCRE2_SPTR name_table;
|
||||
|
||||
int crlf_is_newline;
|
||||
int errornumber;
|
||||
int find_all;
|
||||
int i;
|
||||
int namecount;
|
||||
int name_entry_size;
|
||||
int rc;
|
||||
int utf8;
|
||||
|
||||
uint32_t option_bits;
|
||||
uint32_t newline;
|
||||
|
||||
PCRE2_SIZE erroroffset;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
size_t subject_length;
|
||||
pcre2_match_data *match_data;
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* First, sort out the command line. There is only one possible option at *
|
||||
* the moment, "-g" to request repeated matching to find all occurrences, *
|
||||
* like Perl's /g option. We set the variable find_all to a non-zero value *
|
||||
* if the -g option is present. Apart from that, there must be exactly two *
|
||||
* arguments. *
|
||||
**************************************************************************/
|
||||
|
||||
find_all = 0;
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
if (strcmp(argv[i], "-g") == 0) find_all = 1;
|
||||
else break;
|
||||
}
|
||||
|
||||
/* After the options, we require exactly two arguments, which are the pattern,
|
||||
and the subject string. */
|
||||
|
||||
if (argc - i != 2)
|
||||
{
|
||||
printf("Two arguments required: a regex and a subject string\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* As pattern and subject are char arguments, they can be straightforwardly
|
||||
cast to PCRE2_SPTR as we are working in 8-bit code units. */
|
||||
|
||||
pattern = (PCRE2_SPTR)argv[i];
|
||||
subject = (PCRE2_SPTR)argv[i+1];
|
||||
subject_length = strlen((char *)subject);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Now we are going to compile the regular expression pattern, and handle *
|
||||
* any errors that are detected. *
|
||||
*************************************************************************/
|
||||
|
||||
re = pcre2_compile(
|
||||
pattern, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber, /* for error number */
|
||||
&erroroffset, /* for error offset */
|
||||
NULL); /* use default compile context */
|
||||
|
||||
/* Compilation failed: print the error message and exit. */
|
||||
|
||||
if (re == NULL)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
buffer);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the compilation succeeded, we call PCRE again, in order to do a *
|
||||
* pattern match against the subject string. This does just ONE match. If *
|
||||
* further matching is needed, it will be done below. Before running the *
|
||||
* match we must set up a match_data block for holding the result. *
|
||||
*************************************************************************/
|
||||
|
||||
/* Using this function ensures that the block is exactly the right size for
|
||||
the number of capturing parentheses in the pattern. */
|
||||
|
||||
match_data = pcre2_match_data_create_from_pattern(re, NULL);
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
0, /* start at offset 0 in the subject */
|
||||
0, /* default options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* Matching failed: handle error cases */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
|
||||
/*
|
||||
Handle other special cases if you like
|
||||
*/
|
||||
default: printf("Matching error %d\n", rc); break;
|
||||
}
|
||||
pcre2_match_data_free(match_data); /* Release memory used for the match */
|
||||
pcre2_code_free(re); /* data and the compiled pattern. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
stored. */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* We have found the first match within the subject string. If the output *
|
||||
* vector wasn't big enough, say so. Then output any substrings that were *
|
||||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
pcre2_match_data_create_from_pattern() above. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||
application you might want to do things other than print them. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
size_t substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* That concludes the basic part of this demonstration program. We have *
|
||||
* compiled a pattern, and performed a single match. The code that follows *
|
||||
* shows first how to access named substrings, and then how to code for *
|
||||
* repeated matches on the same subject. *
|
||||
**************************************************************************/
|
||||
|
||||
/* See if there are any named substrings, and if so, show them by name. First
|
||||
we have to extract the count of named parentheses from the pattern. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
|
||||
&namecount); /* where to put the answer */
|
||||
|
||||
if (namecount <= 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr;
|
||||
printf("Named substrings\n");
|
||||
|
||||
/* Before we can access the substrings, we must extract the table for
|
||||
translating names to numbers, and the size of each entry in the table. */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMETABLE, /* address of the table */
|
||||
&name_table); /* where to put the answer */
|
||||
|
||||
(void)pcre2_pattern_info(
|
||||
re, /* the compiled pattern */
|
||||
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
|
||||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
bytes, most significant first. */
|
||||
|
||||
tabptr = name_table;
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the "-g" option was given on the command line, we want to continue *
|
||||
* to search for additional matches in the subject string, in a similar *
|
||||
* way to the /g option in Perl. This turns out to be trickier than you *
|
||||
* might think because of the possibility of matching an empty string. *
|
||||
* What happens is as follows: *
|
||||
* *
|
||||
* If the previous match was NOT for an empty string, we can just start *
|
||||
* the next match at the end of the previous one. *
|
||||
* *
|
||||
* If the previous match WAS for an empty string, we can't do that, as it *
|
||||
* would lead to an infinite loop. Instead, a call of pcre2_match() is *
|
||||
* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
|
||||
* first of these tells PCRE2 that an empty string at the start of the *
|
||||
* subject is not a valid match; other possibilities must be tried. The *
|
||||
* second flag restricts PCRE2 to one match attempt at the initial string *
|
||||
* position. If this match succeeds, an alternative to the empty string *
|
||||
* match has been found, and we can print it and proceed round the loop, *
|
||||
* advancing by the length of whatever was found. If this match does not *
|
||||
* succeed, we still stay in the loop, advancing by just one character. *
|
||||
* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
|
||||
* more than one byte. *
|
||||
* *
|
||||
* However, there is a complication concerned with newlines. When the *
|
||||
* newline convention is such that CRLF is a valid newline, we must *
|
||||
* advance by two characters rather than one. The newline convention can *
|
||||
* be set in the regex by (*CR), etc.; if not, we must find the default. *
|
||||
*************************************************************************/
|
||||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||
return 0; /* Exit the program. */
|
||||
}
|
||||
|
||||
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
|
||||
sequence. First, find the options with which the regex was compiled and extract
|
||||
the UTF state. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
|
||||
utf8 = (option_bits & PCRE2_UTF) != 0;
|
||||
|
||||
/* Now find the newline convention and see whether CRLF is a valid newline
|
||||
sequence. */
|
||||
|
||||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||
newline == PCRE2_NEWLINE_CRLF ||
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
uint32_t options = 0; /* Normally no options */
|
||||
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
|
||||
|
||||
/* If the previous match was for an empty string, we are finished if we are
|
||||
at the end of the subject. Otherwise, arrange to run another match at the
|
||||
same point to see if a non-empty match can be found. */
|
||||
|
||||
if (ovector[0] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == subject_length) break;
|
||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
}
|
||||
|
||||
/* Run the next matching operation */
|
||||
|
||||
rc = pcre2_match(
|
||||
re, /* the compiled pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
start_offset, /* starting offset in the subject */
|
||||
options, /* options */
|
||||
match_data, /* block for storing the result */
|
||||
NULL); /* use default match context */
|
||||
|
||||
/* This time, a result of NOMATCH isn't an error. If the value in "options"
|
||||
is zero, it just means we have found all possible matches, so the loop ends.
|
||||
Otherwise, it means we have failed to find a non-empty-string match at a
|
||||
point where there was a previous empty-string match. In this case, we do what
|
||||
Perl does: advance the matching position by one character, and continue. We
|
||||
do this by setting the "end of previous match" offset, because that is picked
|
||||
up at the top of the loop as the point at which to start again.
|
||||
|
||||
There are two complications: (a) When CRLF is a valid newline sequence, and
|
||||
the current position is just before it, advance by an extra byte. (b)
|
||||
Otherwise we must ensure that we skip an entire UTF character if we are in
|
||||
UTF mode. */
|
||||
|
||||
if (rc == PCRE2_ERROR_NOMATCH)
|
||||
{
|
||||
if (options == 0) break; /* All matches found */
|
||||
ovector[1] = start_offset + 1; /* Advance one code unit */
|
||||
if (crlf_is_newline && /* If CRLF is newline & */
|
||||
start_offset < subject_length - 1 && /* we are at CRLF, */
|
||||
subject[start_offset] == '\r' &&
|
||||
subject[start_offset + 1] == '\n')
|
||||
ovector[1] += 1; /* Advance by one more. */
|
||||
else if (utf8) /* Otherwise, ensure we */
|
||||
{ /* advance a whole UTF-8 */
|
||||
while (ovector[1] < subject_length) /* character. */
|
||||
{
|
||||
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
|
||||
ovector[1] += 1;
|
||||
}
|
||||
}
|
||||
continue; /* Go round the loop again */
|
||||
}
|
||||
|
||||
/* Other matching errors are not recoverable. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
printf("Matching error %d\n", rc);
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
|
||||
printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
|
||||
|
||||
/* The match succeeded, but the output vector wasn't big enough. This
|
||||
should not happen. */
|
||||
|
||||
if (rc == 0)
|
||||
printf("ovector was not big enough for all the captured substrings\n");
|
||||
|
||||
/* As before, show substrings stored in the output vector by number, and then
|
||||
also any named substrings. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
PCRE2_SPTR substring_start = subject + ovector[2*i];
|
||||
size_t substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
|
||||
}
|
||||
|
||||
if (namecount <= 0) printf("No named substrings\n"); else
|
||||
{
|
||||
PCRE2_SPTR tabptr = name_table;
|
||||
printf("Named substrings\n");
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
} /* End of loop to find second and subsequent matches */
|
||||
|
||||
printf("\n");
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2demo.c */
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
783
pcre2/doc/html/pcre2grep.html
Normal file
783
pcre2/doc/html/pcre2grep.html
Normal file
@ -0,0 +1,783 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2grep specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2grep man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
|
||||
<li><a name="TOC4" href="#SEC4">BINARY FILES</a>
|
||||
<li><a name="TOC5" href="#SEC5">OPTIONS</a>
|
||||
<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
|
||||
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
|
||||
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
|
||||
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
|
||||
<li><a name="TOC10" href="#SEC10">MATCHING ERRORS</a>
|
||||
<li><a name="TOC11" href="#SEC11">DIAGNOSTICS</a>
|
||||
<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
|
||||
<li><a name="TOC13" href="#SEC13">AUTHOR</a>
|
||||
<li><a name="TOC14" href="#SEC14">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>pcre2grep [options] [long options] [pattern] [path1 path2 ...]</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
<b>pcre2grep</b> searches files for character patterns, in the same way as other
|
||||
grep commands do, but it uses the PCRE2 regular expression library to support
|
||||
patterns that are compatible with the regular expressions of Perl 5. See
|
||||
<a href="pcre2syntax.html"><b>pcre2syntax</b>(3)</a>
|
||||
for a quick-reference summary of pattern syntax, or
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b>(3)</a>
|
||||
for a full description of the syntax and semantics of the regular expressions
|
||||
that PCRE2 supports.
|
||||
</P>
|
||||
<P>
|
||||
Patterns, whether supplied on the command line or in a separate file, are given
|
||||
without delimiters. For example:
|
||||
<pre>
|
||||
pcre2grep Thursday /etc/motd
|
||||
</pre>
|
||||
If you attempt to use delimiters (for example, by surrounding a pattern with
|
||||
slashes, as is common in Perl scripts), they are interpreted as part of the
|
||||
pattern. Quotes can of course be used to delimit patterns on the command line
|
||||
because they are interpreted by the shell, and indeed quotes are required if a
|
||||
pattern contains white space or shell metacharacters.
|
||||
</P>
|
||||
<P>
|
||||
The first argument that follows any option settings is treated as the single
|
||||
pattern to be matched when neither <b>-e</b> nor <b>-f</b> is present.
|
||||
Conversely, when one or both of these options are used to specify patterns, all
|
||||
arguments are treated as path names. At least one of <b>-e</b>, <b>-f</b>, or an
|
||||
argument pattern must be provided.
|
||||
</P>
|
||||
<P>
|
||||
If no files are specified, <b>pcre2grep</b> reads the standard input. The
|
||||
standard input can also be referenced by a name consisting of a single hyphen.
|
||||
For example:
|
||||
<pre>
|
||||
pcre2grep some-pattern file1 - file3
|
||||
</pre>
|
||||
Input files are searched line by line. By default, each line that matches a
|
||||
pattern is copied to the standard output, and if there is more than one file,
|
||||
the file name is output at the start of each line, followed by a colon.
|
||||
However, there are options that can change how <b>pcre2grep</b> behaves. In
|
||||
particular, the <b>-M</b> option makes it possible to search for strings that
|
||||
span line boundaries. What defines a line boundary is controlled by the
|
||||
<b>-N</b> (<b>--newline</b>) option.
|
||||
</P>
|
||||
<P>
|
||||
The amount of memory used for buffering files that are being scanned is
|
||||
controlled by a parameter that can be set by the <b>--buffer-size</b> option.
|
||||
The default value for this parameter is specified when <b>pcre2grep</b> is
|
||||
built, with the default default being 20K. A block of memory three times this
|
||||
size is used (to allow for buffering "before" and "after" lines). An error
|
||||
occurs if a line overflows the buffer.
|
||||
</P>
|
||||
<P>
|
||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
||||
BUFSIZ is defined in <b><stdio.h></b>. When there is more than one pattern
|
||||
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
|
||||
each line in the order in which they are defined, except that all the <b>-e</b>
|
||||
patterns are tried before the <b>-f</b> patterns.
|
||||
</P>
|
||||
<P>
|
||||
By default, as soon as one pattern matches a line, no further patterns are
|
||||
considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
|
||||
matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
|
||||
<b>--line-offsets</b> is used to output only the part of the line that matched
|
||||
(either shown literally, or as an offset), scanning resumes immediately
|
||||
following the match, so that further matches on the same line can be found. If
|
||||
there are multiple patterns, they are all tried on the remainder of the line,
|
||||
but patterns that follow the one that matched are not tried on the earlier part
|
||||
of the line.
|
||||
</P>
|
||||
<P>
|
||||
This behaviour means that the order in which multiple patterns are specified
|
||||
can affect the output when one of the above options is used. This is no longer
|
||||
the same behaviour as GNU grep, which now manages to display earlier matches
|
||||
for later patterns (as long as there is no overlap).
|
||||
</P>
|
||||
<P>
|
||||
Patterns that can match an empty string are accepted, but empty string
|
||||
matches are never recognized. An example is the pattern "(super)?(man)?", in
|
||||
which all components are optional. This pattern finds all occurrences of both
|
||||
"super" and "man"; the output differs from matching with "super|man" when only
|
||||
the matching substrings are being shown.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variable is set,
|
||||
<b>pcre2grep</b> uses the value to set a locale when calling the PCRE2 library.
|
||||
The <b>--locale</b> option can be used to override this.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
|
||||
<P>
|
||||
It is possible to compile <b>pcre2grep</b> so that it uses <b>libz</b> or
|
||||
<b>libbz2</b> to read files whose names end in <b>.gz</b> or <b>.bz2</b>,
|
||||
respectively. You can find out whether your binary has support for one or both
|
||||
of these file types by running it with the <b>--help</b> option. If the
|
||||
appropriate support is not present, files are treated as plain text. The
|
||||
standard input is always so treated.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
|
||||
<P>
|
||||
By default, a file that contains a binary zero byte within the first 1024 bytes
|
||||
is identified as a binary file, and is processed specially. (GNU grep also
|
||||
identifies binary files in this manner.) See the <b>--binary-files</b> option
|
||||
for a means of changing the way binary files are handled.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
|
||||
<P>
|
||||
The order in which some of the options appear can affect the output. For
|
||||
example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
|
||||
names. Whichever comes later in the command line will be the one that takes
|
||||
effect. Similarly, except where noted below, if an option is given twice, the
|
||||
later setting is used. Numerical values for options may be followed by K or M,
|
||||
to signify multiplication by 1024 or 1024*1024 respectively.
|
||||
</P>
|
||||
<P>
|
||||
<b>--</b>
|
||||
This terminates the list of options. It is useful if the next item on the
|
||||
command line starts with a hyphen but is not an option. This allows for the
|
||||
processing of patterns and file names that start with hyphens.
|
||||
</P>
|
||||
<P>
|
||||
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context after each matching line. If file names
|
||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
||||
colon for the context lines. A line containing "--" is output between each
|
||||
group of lines, unless they are in fact contiguous in the input file. The value
|
||||
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
|
||||
guarantees to have up to 8K of following text available for context output.
|
||||
</P>
|
||||
<P>
|
||||
<b>-a</b>, <b>--text</b>
|
||||
Treat binary files as text. This is equivalent to
|
||||
<b>--binary-files</b>=<i>text</i>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context before each matching line. If file names
|
||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
||||
colon for the context lines. A line containing "--" is output between each
|
||||
group of lines, unless they are in fact contiguous in the input file. The value
|
||||
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
|
||||
guarantees to have up to 8K of preceding text available for context output.
|
||||
</P>
|
||||
<P>
|
||||
<b>--binary-files=</b><i>word</i>
|
||||
Specify how binary files are to be processed. If the word is "binary" (the
|
||||
default), pattern matching is performed on binary files, but the only output is
|
||||
"Binary file <name> matches" when a match succeeds. If the word is "text",
|
||||
which is equivalent to the <b>-a</b> or <b>--text</b> option, binary files are
|
||||
processed in the same way as any other file. In this case, when a match
|
||||
succeeds, the output may be binary garbage, which can have nasty effects if
|
||||
sent to a terminal. If the word is "without-match", which is equivalent to the
|
||||
<b>-I</b> option, binary files are not processed at all; they are assumed not to
|
||||
be of interest and are skipped without causing any output or affecting the
|
||||
return code.
|
||||
</P>
|
||||
<P>
|
||||
<b>--buffer-size=</b><i>number</i>
|
||||
Set the parameter that controls how much memory is used for buffering files
|
||||
that are being scanned.
|
||||
</P>
|
||||
<P>
|
||||
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context both before and after each matching line.
|
||||
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
|
||||
</P>
|
||||
<P>
|
||||
<b>-c</b>, <b>--count</b>
|
||||
Do not output lines from the files that are being scanned; instead output the
|
||||
number of matches (or non-matches if <b>-v</b> is used) that would otherwise
|
||||
have caused lines to be shown. By default, this count is the same as the number
|
||||
of suppressed lines, but if the <b>-M</b> (multiline) option is used (without
|
||||
<b>-v</b>), there may be more suppressed lines than the number of matches.
|
||||
<br>
|
||||
<br>
|
||||
If no lines are selected, the number zero is output. If several files are are
|
||||
being scanned, a count is output for each of them. However, if the
|
||||
<b>--files-with-matches</b> option is also used, only those files whose counts
|
||||
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
|
||||
<b>-B</b>, and <b>-C</b> options are ignored.
|
||||
</P>
|
||||
<P>
|
||||
<b>--colour</b>, <b>--color</b>
|
||||
If this option is given without any data, it is equivalent to "--colour=auto".
|
||||
If data is required, it must be given in the same shell item, separated by an
|
||||
equals sign.
|
||||
</P>
|
||||
<P>
|
||||
<b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
|
||||
This option specifies under what circumstances the parts of a line that matched
|
||||
a pattern should be coloured in the output. By default, the output is not
|
||||
coloured. The value (which is optional, see above) may be "never", "always", or
|
||||
"auto". In the latter case, colouring happens only if the standard output is
|
||||
connected to a terminal. More resources are used when colouring is enabled,
|
||||
because <b>pcre2grep</b> has to search for all possible matches in a line, not
|
||||
just one, in order to colour them all.
|
||||
<br>
|
||||
<br>
|
||||
The colour that is used can be specified by setting the environment variable
|
||||
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
|
||||
string of two numbers, separated by a semicolon. They are copied directly into
|
||||
the control string for setting colour on a terminal, so it is your
|
||||
responsibility to ensure that they make sense. If neither of the environment
|
||||
variables is set, the default is "1;31", which gives red.
|
||||
</P>
|
||||
<P>
|
||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
||||
If an input path is not a regular file or a directory, "action" specifies how
|
||||
it is to be processed. Valid values are "read" (the default) or "skip"
|
||||
(silently skip the path).
|
||||
</P>
|
||||
<P>
|
||||
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
|
||||
If an input path is a directory, "action" specifies how it is to be processed.
|
||||
Valid values are "read" (the default in non-Windows environments, for
|
||||
compatibility with GNU grep), "recurse" (equivalent to the <b>-r</b> option), or
|
||||
"skip" (silently skip the path, the default in Windows environments). In the
|
||||
"read" case, directories are read as if they were ordinary files. In some
|
||||
operating systems the effect of reading a directory like this is an immediate
|
||||
end-of-file; in others it may provoke an error.
|
||||
</P>
|
||||
<P>
|
||||
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
|
||||
Specify a pattern to be matched. This option can be used multiple times in
|
||||
order to specify several patterns. It can also be used as a way of specifying a
|
||||
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
|
||||
pattern is taken from the command line; all arguments are treated as file
|
||||
names. There is no limit to the number of patterns. They are applied to each
|
||||
line in the order in which they are defined until one matches.
|
||||
<br>
|
||||
<br>
|
||||
If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
|
||||
followed by the patterns from the file(s), independent of the order in which
|
||||
these options are specified. Note that multiple use of <b>-e</b> is not the same
|
||||
as a single pattern with alternatives. For example, X|Y finds the first
|
||||
character in a line that is X or Y, whereas if the two patterns are given
|
||||
separately, with X first, <b>pcre2grep</b> finds X if it is present, even if it
|
||||
follows Y in the line. It finds Y only if there is no X in the line. This
|
||||
matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
|
||||
of the line that matched.
|
||||
</P>
|
||||
<P>
|
||||
<b>--exclude</b>=<i>pattern</i>
|
||||
Files (but not directories) whose names match the pattern are skipped without
|
||||
being processed. This applies to all files, whether listed on the command line,
|
||||
obtained from <b>--file-list</b>, or by scanning a directory. The pattern is a
|
||||
PCRE2 regular expression, and is matched against the final component of the
|
||||
file name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do
|
||||
not apply to this pattern. The option may be given any number of times in order
|
||||
to specify multiple patterns. If a file name matches both an <b>--include</b>
|
||||
and an <b>--exclude</b> pattern, it is excluded. There is no short form for this
|
||||
option.
|
||||
</P>
|
||||
<P>
|
||||
<b>--exclude-from=</b><i>filename</i>
|
||||
Treat each non-empty line of the file as the data for an <b>--exclude</b>
|
||||
option. What constitutes a newline when reading the file is the operating
|
||||
system's default. The <b>--newline</b> option has no effect on this option. This
|
||||
option may be given more than once in order to specify a number of files to
|
||||
read.
|
||||
</P>
|
||||
<P>
|
||||
<b>--exclude-dir</b>=<i>pattern</i>
|
||||
Directories whose names match the pattern are skipped without being processed,
|
||||
whatever the setting of the <b>--recursive</b> option. This applies to all
|
||||
directories, whether listed on the command line, obtained from
|
||||
<b>--file-list</b>, or by scanning a parent directory. The pattern is a PCRE2
|
||||
regular expression, and is matched against the final component of the directory
|
||||
name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
|
||||
apply to this pattern. The option may be given any number of times in order to
|
||||
specify more than one pattern. If a directory matches both <b>--include-dir</b>
|
||||
and <b>--exclude-dir</b>, it is excluded. There is no short form for this
|
||||
option.
|
||||
</P>
|
||||
<P>
|
||||
<b>-F</b>, <b>--fixed-strings</b>
|
||||
Interpret each data-matching pattern as a list of fixed strings, separated by
|
||||
newlines, instead of as a regular expression. What constitutes a newline for
|
||||
this purpose is controlled by the <b>--newline</b> option. The <b>-w</b> (match
|
||||
as a word) and <b>-x</b> (match whole line) options can be used with <b>-F</b>.
|
||||
They apply to each of the fixed strings. A line is selected if any of the fixed
|
||||
strings are found in it (subject to <b>-w</b> or <b>-x</b>, if present). This
|
||||
option applies only to the patterns that are matched against the contents of
|
||||
files; it does not apply to patterns specified by any of the <b>--include</b> or
|
||||
<b>--exclude</b> options.
|
||||
</P>
|
||||
<P>
|
||||
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
|
||||
Read patterns from the file, one per line, and match them against
|
||||
each line of input. What constitutes a newline when reading the file is the
|
||||
operating system's default. The <b>--newline</b> option has no effect on this
|
||||
option. Trailing white space is removed from each line, and blank lines are
|
||||
ignored. An empty file contains no patterns and therefore matches nothing. See
|
||||
also the comments about multiple patterns versus a single pattern with
|
||||
alternatives in the description of <b>-e</b> above.
|
||||
<br>
|
||||
<br>
|
||||
If this option is given more than once, all the specified files are
|
||||
read. A data line is output if any of the patterns match it. A file name can
|
||||
be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
|
||||
specified on the command line using <b>-e</b> may also be present; they are
|
||||
tested before the file's patterns. However, no other pattern is taken from the
|
||||
command line; all arguments are treated as the names of paths to be searched.
|
||||
</P>
|
||||
<P>
|
||||
<b>--file-list</b>=<i>filename</i>
|
||||
Read a list of files and/or directories that are to be scanned from the given
|
||||
file, one per line. Trailing white space is removed from each line, and blank
|
||||
lines are ignored. These paths are processed before any that are listed on the
|
||||
command line. The file name can be given as "-" to refer to the standard input.
|
||||
If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
|
||||
read first. This is useful only when the standard input is a terminal, from
|
||||
which further lines (the list of files) can be read after an end-of-file
|
||||
indication. If this option is given more than once, all the specified files are
|
||||
read.
|
||||
</P>
|
||||
<P>
|
||||
<b>--file-offsets</b>
|
||||
Instead of showing lines or parts of lines that match, show each match as an
|
||||
offset from the start of the file and a length, separated by a comma. In this
|
||||
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
|
||||
options are ignored. If there is more than one match in a line, each of them is
|
||||
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
|
||||
and <b>--only-matching</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-H</b>, <b>--with-filename</b>
|
||||
Force the inclusion of the file name at the start of output lines when
|
||||
searching a single file. By default, the file name is not shown in this case.
|
||||
For matching lines, the file name is followed by a colon; for context lines, a
|
||||
hyphen separator is used. If a line number is also being output, it follows the
|
||||
file name. When the <b>-M</b> option causes a pattern to match more than one
|
||||
line, only the first is preceded by the file name.
|
||||
</P>
|
||||
<P>
|
||||
<b>-h</b>, <b>--no-filename</b>
|
||||
Suppress the output file names when searching multiple files. By default,
|
||||
file names are shown when multiple files are searched. For matching lines, the
|
||||
file name is followed by a colon; for context lines, a hyphen separator is used.
|
||||
If a line number is also being output, it follows the file name.
|
||||
</P>
|
||||
<P>
|
||||
<b>--help</b>
|
||||
Output a help message, giving brief details of the command options and file
|
||||
type support, and then exit. Anything else on the command line is
|
||||
ignored.
|
||||
</P>
|
||||
<P>
|
||||
<b>-I</b>
|
||||
Ignore binary files. This is equivalent to
|
||||
<b>--binary-files</b>=<i>without-match</i>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-i</b>, <b>--ignore-case</b>
|
||||
Ignore upper/lower case distinctions during comparisons.
|
||||
</P>
|
||||
<P>
|
||||
<b>--include</b>=<i>pattern</i>
|
||||
If any <b>--include</b> patterns are specified, the only files that are
|
||||
processed are those that match one of the patterns (and do not match an
|
||||
<b>--exclude</b> pattern). This option does not affect directories, but it
|
||||
applies to all files, whether listed on the command line, obtained from
|
||||
<b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
|
||||
expression, and is matched against the final component of the file name, not
|
||||
the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not apply to
|
||||
this pattern. The option may be given any number of times. If a file name
|
||||
matches both an <b>--include</b> and an <b>--exclude</b> pattern, it is excluded.
|
||||
There is no short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>--include-from=</b><i>filename</i>
|
||||
Treat each non-empty line of the file as the data for an <b>--include</b>
|
||||
option. What constitutes a newline for this purpose is the operating system's
|
||||
default. The <b>--newline</b> option has no effect on this option. This option
|
||||
may be given any number of times; all the files are read.
|
||||
</P>
|
||||
<P>
|
||||
<b>--include-dir</b>=<i>pattern</i>
|
||||
If any <b>--include-dir</b> patterns are specified, the only directories that
|
||||
are processed are those that match one of the patterns (and do not match an
|
||||
<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
|
||||
on the command line, obtained from <b>--file-list</b>, or by scanning a parent
|
||||
directory. The pattern is a PCRE2 regular expression, and is matched against
|
||||
the final component of the directory name, not the entire path. The <b>-F</b>,
|
||||
<b>-w</b>, and <b>-x</b> options do not apply to this pattern. The option may be
|
||||
given any number of times. If a directory matches both <b>--include-dir</b> and
|
||||
<b>--exclude-dir</b>, it is excluded. There is no short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>-L</b>, <b>--files-without-match</b>
|
||||
Instead of outputting lines from the files, just output the names of the files
|
||||
that do not contain any lines that would have been output. Each file name is
|
||||
output once, on a separate line.
|
||||
</P>
|
||||
<P>
|
||||
<b>-l</b>, <b>--files-with-matches</b>
|
||||
Instead of outputting lines from the files, just output the names of the files
|
||||
containing lines that would have been output. Each file name is output
|
||||
once, on a separate line. Searching normally stops as soon as a matching line
|
||||
is found in a file. However, if the <b>-c</b> (count) option is also used,
|
||||
matching continues in order to obtain the correct count, and those files that
|
||||
have at least one match are listed along with their counts. Using this option
|
||||
with <b>-c</b> is a way of suppressing the listing of files with no matches.
|
||||
</P>
|
||||
<P>
|
||||
<b>--label</b>=<i>name</i>
|
||||
This option supplies a name to be used for the standard input when file names
|
||||
are being output. If not supplied, "(standard input)" is used. There is no
|
||||
short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>--line-buffered</b>
|
||||
When this option is given, input is read and processed line by line, and the
|
||||
output is flushed after each write. By default, input is read in large chunks,
|
||||
unless <b>pcre2grep</b> can determine that it is reading from a terminal (which
|
||||
is currently possible only in Unix-like environments). Output to terminal is
|
||||
normally automatically flushed by the operating system. This option can be
|
||||
useful when the input or output is attached to a pipe and you do not want
|
||||
<b>pcre2grep</b> to buffer up large amounts of data. However, its use will
|
||||
affect performance, and the <b>-M</b> (multiline) option ceases to work.
|
||||
</P>
|
||||
<P>
|
||||
<b>--line-offsets</b>
|
||||
Instead of showing lines or parts of lines that match, show each match as a
|
||||
line number, the offset from the start of the line, and a length. The line
|
||||
number is terminated by a colon (as usual; see the <b>-n</b> option), and the
|
||||
offset and length are separated by a comma. In this mode, no context is shown.
|
||||
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
|
||||
more than one match in a line, each of them is shown separately. This option is
|
||||
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>--locale</b>=<i>locale-name</i>
|
||||
This option specifies a locale to be used for pattern matching. It overrides
|
||||
the value in the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variables. If no
|
||||
locale is specified, the PCRE2 library's default (usually the "C" locale) is
|
||||
used. There is no short form for this option.
|
||||
</P>
|
||||
<P>
|
||||
<b>--match-limit</b>=<i>number</i>
|
||||
Processing some regular expression patterns can require a very large amount of
|
||||
memory, leading in some cases to a program crash if not enough is available.
|
||||
Other patterns may take a very long time to search for all possible matching
|
||||
strings. The <b>pcre2_match()</b> function that is called by <b>pcre2grep</b> to
|
||||
do the matching has two parameters that can limit the resources that it uses.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--match-limit</b> option provides a means of limiting resource usage
|
||||
when processing patterns that are not going to match, but which have a very
|
||||
large number of possibilities in their search trees. The classic example is a
|
||||
pattern that uses nested unlimited repeats. Internally, PCRE2 uses a function
|
||||
called <b>match()</b> which it calls repeatedly (sometimes recursively). The
|
||||
limit set by <b>--match-limit</b> is imposed on the number of times this
|
||||
function is called during a match, which has the effect of limiting the amount
|
||||
of backtracking that can take place.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--recursion-limit</b> option is similar to <b>--match-limit</b>, but
|
||||
instead of limiting the total number of times that <b>match()</b> is called, it
|
||||
limits the depth of recursive calls, which in turn limits the amount of memory
|
||||
that can be used. The recursion depth is a smaller number than the total number
|
||||
of calls, because not all calls to <b>match()</b> are recursive. This limit is
|
||||
of use only if it is set smaller than <b>--match-limit</b>.
|
||||
<br>
|
||||
<br>
|
||||
There are no short forms for these options. The default settings are specified
|
||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||
</P>
|
||||
<P>
|
||||
<b>-M</b>, <b>--multiline</b>
|
||||
Allow patterns to match more than one line. When this option is given, patterns
|
||||
may usefully contain literal newline characters and internal occurrences of ^
|
||||
and $ characters. The output for a successful match may consist of more than
|
||||
one line. The first is the line in which the match started, and the last is the
|
||||
line in which the match ended. If the matched string ends with a newline
|
||||
sequence the output ends at the end of that line.
|
||||
<br>
|
||||
<br>
|
||||
When this option is set, the PCRE2 library is called in "multiline" mode.
|
||||
However, <b>pcre2grep</b> still processes the input line by line. The difference
|
||||
is that a matched string may extend past the end of a line and continue on
|
||||
one or more subsequent lines. The newline sequence must be matched as part of
|
||||
the pattern. For example, to find the phrase "regular expression" in a file
|
||||
where "regular" might be at the end of a line and "expression" at the start of
|
||||
the next line, you could use this command:
|
||||
<pre>
|
||||
pcre2grep -M 'regular\s+expression' <file>
|
||||
</pre>
|
||||
The \s escape sequence matches any white space character, including newlines,
|
||||
and is followed by + so as to match trailing white space on the first line as
|
||||
well as possibly handling a two-character newline sequence.
|
||||
<br>
|
||||
<br>
|
||||
There is a limit to the number of lines that can be matched, imposed by the way
|
||||
that <b>pcre2grep</b> buffers the input file as it scans it. However,
|
||||
<b>pcre2grep</b> ensures that at least 8K characters or the rest of the file
|
||||
(whichever is the shorter) are available for forward matching, and similarly
|
||||
the previous 8K characters (or all the previous characters, if fewer than 8K)
|
||||
are guaranteed to be available for lookbehind assertions. The <b>-M</b> option
|
||||
does not work when input is read line by line (see \fP--line-buffered\fP.)
|
||||
</P>
|
||||
<P>
|
||||
<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
|
||||
The PCRE2 library supports five different conventions for indicating
|
||||
the ends of lines. They are the single-character sequences CR (carriage return)
|
||||
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
|
||||
which recognizes any of the preceding three types, and an "any" convention, in
|
||||
which any Unicode line ending sequence is assumed to end a line. The Unicode
|
||||
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
|
||||
(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
|
||||
PS (paragraph separator, U+2029).
|
||||
<br>
|
||||
<br>
|
||||
When the PCRE2 library is built, a default line-ending sequence is specified.
|
||||
This is normally the standard sequence for the operating system. Unless
|
||||
otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
|
||||
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
|
||||
makes it possible to use <b>pcre2grep</b> to scan files that have come from
|
||||
other environments without having to modify their line endings. If the data
|
||||
that is being scanned does not agree with the convention set by this option,
|
||||
<b>pcre2grep</b> may behave in strange ways. Note that this option does not
|
||||
apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
|
||||
<b>--include-from</b> options, which are expected to use the operating system's
|
||||
standard newline sequence.
|
||||
</P>
|
||||
<P>
|
||||
<b>-n</b>, <b>--line-number</b>
|
||||
Precede each output line by its line number in the file, followed by a colon
|
||||
for matching lines or a hyphen for context lines. If the file name is also
|
||||
being output, it precedes the line number. When the <b>-M</b> option causes a
|
||||
pattern to match more than one line, only the first is preceded by its line
|
||||
number. This option is forced if <b>--line-offsets</b> is used.
|
||||
</P>
|
||||
<P>
|
||||
<b>--no-jit</b>
|
||||
If the PCRE2 library is built with support for just-in-time compiling (which
|
||||
speeds up matching), <b>pcre2grep</b> automatically makes use of this, unless it
|
||||
was explicitly disabled at build time. This option can be used to disable the
|
||||
use of JIT at run time. It is provided for testing and working round problems.
|
||||
It should never be needed in normal use.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b>, <b>--only-matching</b>
|
||||
Show only the part of the line that matched a pattern instead of the whole
|
||||
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
||||
<b>-C</b> options are ignored. If there is more than one match in a line, each
|
||||
of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
|
||||
sense of the match to find non-matching lines), no output is generated, but the
|
||||
return code is set appropriately. If the matched portion of the line is empty,
|
||||
nothing is output unless the file name or line number are being printed, in
|
||||
which case they are shown on an otherwise empty line. This option is mutually
|
||||
exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
|
||||
Show only the part of the line that matched the capturing parentheses of the
|
||||
given number. Up to 32 capturing parentheses are supported, and -o0 is
|
||||
equivalent to <b>-o</b> without a number. Because these options can be given
|
||||
without an argument (see above), if an argument is present, it must be given in
|
||||
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||
for the non-argument case above also apply to this case. If the specified
|
||||
capturing parentheses do not exist in the pattern, or were not set in the
|
||||
match, nothing is output unless the file name or line number are being output.
|
||||
<br>
|
||||
<br>
|
||||
If this option is given multiple times, multiple substrings are output, in the
|
||||
order the options are given. For example, -o3 -o1 -o3 causes the substrings
|
||||
matched by capturing parentheses 3 and 1 and then 3 again to be output. By
|
||||
default, there is no separator (but see the next option).
|
||||
</P>
|
||||
<P>
|
||||
<b>--om-separator</b>=<i>text</i>
|
||||
Specify a separating string for multiple occurrences of <b>-o</b>. The default
|
||||
is an empty string. Separating strings are never coloured.
|
||||
</P>
|
||||
<P>
|
||||
<b>-q</b>, <b>--quiet</b>
|
||||
Work quietly, that is, display nothing except error messages. The exit
|
||||
status indicates whether or not any matches were found.
|
||||
</P>
|
||||
<P>
|
||||
<b>-r</b>, <b>--recursive</b>
|
||||
If any given path is a directory, recursively scan the files it contains,
|
||||
taking note of any <b>--include</b> and <b>--exclude</b> settings. By default, a
|
||||
directory is read as a normal file; in some operating systems this gives an
|
||||
immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
|
||||
option to "recurse".
|
||||
</P>
|
||||
<P>
|
||||
<b>--recursion-limit</b>=<i>number</i>
|
||||
See <b>--match-limit</b> above.
|
||||
</P>
|
||||
<P>
|
||||
<b>-s</b>, <b>--no-messages</b>
|
||||
Suppress error messages about non-existent or unreadable files. Such files are
|
||||
quietly skipped. However, the return code is still 2, even if matches were
|
||||
found in other files.
|
||||
</P>
|
||||
<P>
|
||||
<b>-u</b>, <b>--utf-8</b>
|
||||
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
||||
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
||||
<b>--include</b> options) and all subject lines that are scanned must be valid
|
||||
strings of UTF-8 characters.
|
||||
</P>
|
||||
<P>
|
||||
<b>-V</b>, <b>--version</b>
|
||||
Write the version numbers of <b>pcre2grep</b> and the PCRE2 library to the
|
||||
standard output and then exit. Anything else on the command line is
|
||||
ignored.
|
||||
</P>
|
||||
<P>
|
||||
<b>-v</b>, <b>--invert-match</b>
|
||||
Invert the sense of the match, so that lines which do <i>not</i> match any of
|
||||
the patterns are the ones that are found.
|
||||
</P>
|
||||
<P>
|
||||
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
|
||||
Force the patterns to match only whole words. This is equivalent to having \b
|
||||
at the start and end of the pattern. This option applies only to the patterns
|
||||
that are matched against the contents of files; it does not apply to patterns
|
||||
specified by any of the <b>--include</b> or <b>--exclude</b> options.
|
||||
</P>
|
||||
<P>
|
||||
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
||||
Force the patterns to be anchored (each must start matching at the beginning of
|
||||
a line) and in addition, require them to match entire lines. This is equivalent
|
||||
to having ^ and $ characters at the start and end of each alternative top-level
|
||||
branch in every pattern. This option applies only to the patterns that are
|
||||
matched against the contents of files; it does not apply to patterns specified
|
||||
by any of the <b>--include</b> or <b>--exclude</b> options.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
||||
<P>
|
||||
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
|
||||
order, for a locale. The first one that is set is used. This can be overridden
|
||||
by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
|
||||
(usually the "C" locale) is used.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
|
||||
<P>
|
||||
The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
|
||||
different newline conventions from the default. Any parts of the input files
|
||||
that are written to the standard output are copied identically, with whatever
|
||||
newline sequences they have in the input. However, the setting of this option
|
||||
does not affect the interpretation of files specified by the <b>-f</b>,
|
||||
<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
|
||||
the operating system's standard newline sequence, nor does it affect the way in
|
||||
which <b>pcre2grep</b> writes informational messages to the standard error and
|
||||
output streams. For these it uses the string "\n" to indicate newlines,
|
||||
relying on the C I/O library to convert this to an appropriate sequence.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
|
||||
<P>
|
||||
Many of the short and long forms of <b>pcre2grep</b>'s options are the same
|
||||
as in the GNU <b>grep</b> program. Any long option of the form
|
||||
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
||||
(PCRE2 terminology). However, the <b>--file-list</b>, <b>--file-offsets</b>,
|
||||
<b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
|
||||
<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
|
||||
<b>--recursion-limit</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
|
||||
<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
|
||||
capturing parentheses number.
|
||||
</P>
|
||||
<P>
|
||||
Although most of the common options work the same way, a few are different in
|
||||
<b>pcre2grep</b>. For example, the <b>--include</b> option's argument is a glob
|
||||
for GNU <b>grep</b>, but a regular expression for <b>pcre2grep</b>. If both the
|
||||
<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
|
||||
without counts, but <b>pcre2grep</b> gives the counts as well.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
|
||||
<P>
|
||||
There are four different ways in which an option with data can be specified.
|
||||
If a short form option is used, the data may follow immediately, or (with one
|
||||
exception) in the next command line item. For example:
|
||||
<pre>
|
||||
-f/some/file
|
||||
-f /some/file
|
||||
</pre>
|
||||
The exception is the <b>-o</b> option, which may appear with or without data.
|
||||
Because of this, if data is present, it must follow immediately in the same
|
||||
item, for example -o3.
|
||||
</P>
|
||||
<P>
|
||||
If a long form option is used, the data may appear in the same command line
|
||||
item, separated by an equals character, or (with two exceptions) it may appear
|
||||
in the next command line item. For example:
|
||||
<pre>
|
||||
--file=/some/file
|
||||
--file /some/file
|
||||
</pre>
|
||||
Note, however, that if you want to supply a file name beginning with ~ as data
|
||||
in a shell command, and have the shell expand ~ to a home directory, you must
|
||||
separate the file name from the option, because the shell does not treat ~
|
||||
specially unless it is at the start of an item.
|
||||
</P>
|
||||
<P>
|
||||
The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
|
||||
<b>--only-matching</b> options, for which the data is optional. If one of these
|
||||
options does have data, it must be given in the first form, using an equals
|
||||
character. Otherwise <b>pcre2grep</b> will assume that it has no data.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">MATCHING ERRORS</a><br>
|
||||
<P>
|
||||
It is possible to supply a regular expression that takes a very long time to
|
||||
fail to match certain lines. Such patterns normally involve nested indefinite
|
||||
repeats, for example: (a+)*\d when matched against a line of a's with no final
|
||||
digit. The PCRE2 matching function has a resource limit that causes it to abort
|
||||
in these circumstances. If this happens, <b>pcre2grep</b> outputs an error
|
||||
message and the line that caused the problem to the standard error stream. If
|
||||
there are more than 20 such errors, <b>pcre2grep</b> gives up.
|
||||
</P>
|
||||
<P>
|
||||
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
|
||||
overall resource limit; there is a second option called <b>--recursion-limit</b>
|
||||
that sets a limit on the amount of memory (usually stack) that is used (see the
|
||||
discussion of these options above).
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">DIAGNOSTICS</a><br>
|
||||
<P>
|
||||
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
|
||||
for syntax errors, overlong lines, non-existent or inaccessible files (even if
|
||||
matches were found in other files) or too many matching errors. Using the
|
||||
<b>-s</b> option to suppress error messages about inaccessible files does not
|
||||
affect the return code.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 January 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
428
pcre2/doc/html/pcre2jit.html
Normal file
428
pcre2/doc/html/pcre2jit.html
Normal file
@ -0,0 +1,428 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2jit specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2jit man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a>
|
||||
<li><a name="TOC2" href="#SEC2">AVAILABILITY OF JIT SUPPORT</a>
|
||||
<li><a name="TOC3" href="#SEC3">SIMPLE USE OF JIT</a>
|
||||
<li><a name="TOC4" href="#SEC4">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
|
||||
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM JIT MATCHING</a>
|
||||
<li><a name="TOC6" href="#SEC6">CONTROLLING THE JIT STACK</a>
|
||||
<li><a name="TOC7" href="#SEC7">JIT STACK FAQ</a>
|
||||
<li><a name="TOC8" href="#SEC8">FREEING JIT SPECULATIVE MEMORY</a>
|
||||
<li><a name="TOC9" href="#SEC9">EXAMPLE CODE</a>
|
||||
<li><a name="TOC10" href="#SEC10">JIT FAST PATH API</a>
|
||||
<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
|
||||
<li><a name="TOC12" href="#SEC12">AUTHOR</a>
|
||||
<li><a name="TOC13" href="#SEC13">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||
<P>
|
||||
Just-in-time compiling is a heavyweight optimization that can greatly speed up
|
||||
pattern matching. However, it comes at the cost of extra processing before the
|
||||
match is performed, so it is of most benefit when the same pattern is going to
|
||||
be matched many times. This does not necessarily mean many calls of a matching
|
||||
function; if the pattern is not anchored, matching attempts may take place many
|
||||
times at various positions in the subject, even for a single call. Therefore,
|
||||
if the subject string is very long, it may still pay to use JIT even for
|
||||
one-off matches. JIT support is available for all of the 8-bit, 16-bit and
|
||||
32-bit PCRE2 libraries.
|
||||
</P>
|
||||
<P>
|
||||
JIT support applies only to the traditional Perl-compatible matching function.
|
||||
It does not apply when the DFA matching function is being used. The code for
|
||||
this support was written by Zoltan Herczeg.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">AVAILABILITY OF JIT SUPPORT</a><br>
|
||||
<P>
|
||||
JIT support is an optional feature of PCRE2. The "configure" option
|
||||
--enable-jit (or equivalent CMake option) must be set when PCRE2 is built if
|
||||
you want to use JIT. The support is limited to the following hardware
|
||||
platforms:
|
||||
<pre>
|
||||
ARM 32-bit (v5, v7, and Thumb2)
|
||||
ARM 64-bit
|
||||
Intel x86 32-bit and 64-bit
|
||||
MIPS 32-bit and 64-bit
|
||||
Power PC 32-bit and 64-bit
|
||||
SPARC 32-bit
|
||||
</pre>
|
||||
If --enable-jit is set on an unsupported platform, compilation fails.
|
||||
</P>
|
||||
<P>
|
||||
A program can tell if JIT support is available by calling <b>pcre2_config()</b>
|
||||
with the PCRE2_CONFIG_JIT option. The result is 1 when JIT is available, and 0
|
||||
otherwise. However, a simple program does not need to check this in order to
|
||||
use JIT. The API is implemented in a way that falls back to the interpretive
|
||||
code if JIT is not available. For programs that need the best possible
|
||||
performance, there is also a "fast path" API that is JIT-specific.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
|
||||
<P>
|
||||
To make use of the JIT support in the simplest way, all you have to do is to
|
||||
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
|
||||
<b>pcre2_compile()</b>. This function has two arguments: the first is the
|
||||
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
|
||||
second is zero or more of the following option bits: PCRE2_JIT_COMPLETE,
|
||||
PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||
</P>
|
||||
<P>
|
||||
If JIT support is not available, a call to <b>pcre2_jit_compile()</b> does
|
||||
nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern
|
||||
is passed to the JIT compiler, which turns it into machine code that executes
|
||||
much faster than the normal interpretive code, but yields exactly the same
|
||||
results. The returned value from <b>pcre2_jit_compile()</b> is zero on success,
|
||||
or a negative error code.
|
||||
</P>
|
||||
<P>
|
||||
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete
|
||||
matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or
|
||||
PCRE2_PARTIAL_SOFT options of <b>pcre2_match()</b>, you should set one or both
|
||||
of the other options as well as, or instead of PCRE2_JIT_COMPLETE. The JIT
|
||||
compiler generates different optimized code for each of the three modes
|
||||
(normal, soft partial, hard partial). When <b>pcre2_match()</b> is called, the
|
||||
appropriate code is run if it is available. Otherwise, the pattern is matched
|
||||
using interpretive code.
|
||||
</P>
|
||||
<P>
|
||||
You can call <b>pcre2_jit_compile()</b> multiple times for the same compiled
|
||||
pattern. It does nothing if it has previously compiled code for any of the
|
||||
option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and
|
||||
(perhaps later, when you find you need partial matching) again with
|
||||
PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore
|
||||
PCRE2_JIT_COMPLETE and just compile code for partial matching. If
|
||||
<b>pcre2_jit_compile()</b> is called with no option bits set, it immediately
|
||||
returns zero. This is an alternative way of testing whether JIT is available.
|
||||
</P>
|
||||
<P>
|
||||
At present, it is not possible to free JIT compiled code except when the entire
|
||||
compiled pattern is freed by calling <b>pcre2_code_free()</b>.
|
||||
</P>
|
||||
<P>
|
||||
In some circumstances you may need to call additional functions. These are
|
||||
described in the section entitled
|
||||
<a href="#stackcontrol">"Controlling the JIT stack"</a>
|
||||
below.
|
||||
</P>
|
||||
<P>
|
||||
There are some <b>pcre2_match()</b> options that are not supported by JIT, and
|
||||
there are also some pattern items that JIT cannot handle. Details are given
|
||||
below. In both cases, matching automatically falls back to the interpretive
|
||||
code. If you want to know whether JIT was actually used for a particular match,
|
||||
you should arrange for a JIT callback function to be set up as described in the
|
||||
section entitled
|
||||
<a href="#stackcontrol">"Controlling the JIT stack"</a>
|
||||
below, even if you do not need to supply a non-default JIT stack. Such a
|
||||
callback function is called whenever JIT code is about to be obeyed. If the
|
||||
match-time options are not right for JIT execution, the callback function is
|
||||
not obeyed.
|
||||
</P>
|
||||
<P>
|
||||
If the JIT compiler finds an unsupported item, no JIT data is generated. You
|
||||
can find out if JIT matching is available after compiling a pattern by calling
|
||||
<b>pcre2_pattern_info()</b> with the PCRE2_INFO_JITSIZE option. A non-zero
|
||||
result means that JIT compilation was successful. A result of 0 means that JIT
|
||||
support is not available, or the pattern was not processed by
|
||||
<b>pcre2_jit_compile()</b>, or the JIT compiler was not able to handle the
|
||||
pattern.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
|
||||
<P>
|
||||
The <b>pcre2_match()</b> options that are supported for JIT matching are
|
||||
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
|
||||
PCRE2_ANCHORED option is not supported at match time.
|
||||
</P>
|
||||
<P>
|
||||
The only unsupported pattern items are \C (match a single data unit) when
|
||||
running in a UTF mode, and a callout immediately before an assertion condition
|
||||
in a conditional group.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">RETURN VALUES FROM JIT MATCHING</a><br>
|
||||
<P>
|
||||
When a pattern is matched using JIT matching, the return values are the same
|
||||
as those given by the interpretive <b>pcre2_match()</b> code, with the addition
|
||||
of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory
|
||||
used for the JIT stack was insufficient. See
|
||||
<a href="#stackcontrol">"Controlling the JIT stack"</a>
|
||||
below for a discussion of JIT stack usage.
|
||||
</P>
|
||||
<P>
|
||||
The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if searching
|
||||
a very large pattern tree goes on for too long, as it is in the same
|
||||
circumstance when JIT is not used, but the details of exactly what is counted
|
||||
are not the same. The PCRE2_ERROR_RECURSIONLIMIT error code is never returned
|
||||
when JIT matching is used.
|
||||
<a name="stackcontrol"></a></P>
|
||||
<br><a name="SEC6" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
|
||||
<P>
|
||||
When the compiled JIT code runs, it needs a block of memory to use as a stack.
|
||||
By default, it uses 32K on the machine stack. However, some large or
|
||||
complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT
|
||||
is given when there is not enough stack. Three functions are provided for
|
||||
managing blocks of memory for use as JIT stacks. There is further discussion
|
||||
about the use of JIT stacks in the section entitled
|
||||
<a href="#stackfaq">"JIT stack FAQ"</a>
|
||||
below.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_jit_stack_create()</b> function creates a JIT stack. Its arguments
|
||||
are a starting size, a maximum size, and a general context (for memory
|
||||
allocation functions, or NULL for standard memory allocation). It returns a
|
||||
pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
|
||||
is an error. The <b>pcre2_jit_stack_free()</b> function is used to free a stack
|
||||
that is no longer needed. (For the technically minded: the address space is
|
||||
allocated by mmap or VirtualAlloc.)
|
||||
</P>
|
||||
<P>
|
||||
JIT uses far less memory for recursion than the interpretive code,
|
||||
and a maximum stack size of 512K to 1M should be more than enough for any
|
||||
pattern.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_jit_stack_assign()</b> function specifies which stack JIT code
|
||||
should use. Its arguments are as follows:
|
||||
<pre>
|
||||
pcre2_match_context *mcontext
|
||||
pcre2_jit_callback callback
|
||||
void *data
|
||||
</pre>
|
||||
The first argument is a pointer to a match context. When this is subsequently
|
||||
passed to a matching function, its information determines which JIT stack is
|
||||
used. There are three cases for the values of the other two options:
|
||||
<pre>
|
||||
(1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
|
||||
on the machine stack is used. This is the default when a match
|
||||
context is created.
|
||||
|
||||
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
|
||||
a pointer to a valid JIT stack, the result of calling
|
||||
<b>pcre2_jit_stack_create()</b>.
|
||||
|
||||
(3) If <i>callback</i> is not NULL, it must point to a function that is
|
||||
called with <i>data</i> as an argument at the start of matching, in
|
||||
order to set up a JIT stack. If the return from the callback
|
||||
function is NULL, the internal 32K stack is used; otherwise the
|
||||
return value must be a valid JIT stack, the result of calling
|
||||
<b>pcre2_jit_stack_create()</b>.
|
||||
</pre>
|
||||
A callback function is obeyed whenever JIT code is about to be run; it is not
|
||||
obeyed when <b>pcre2_match()</b> is called with options that are incompatible
|
||||
for JIT matching. A callback function can therefore be used to determine
|
||||
whether a match operation was executed by JIT or by the interpreter.
|
||||
</P>
|
||||
<P>
|
||||
You may safely use the same JIT stack for more than one pattern (either by
|
||||
assigning directly or by callback), as long as the patterns are all matched
|
||||
sequentially in the same thread. In a multithread application, if you do not
|
||||
specify a JIT stack, or if you assign or pass back NULL from a callback, that
|
||||
is thread-safe, because each thread has its own machine stack. However, if you
|
||||
assign or pass back a non-NULL JIT stack, this must be a different stack for
|
||||
each thread so that the application is thread-safe.
|
||||
</P>
|
||||
<P>
|
||||
Strictly speaking, even more is allowed. You can assign the same non-NULL stack
|
||||
to a match context that is used by any number of patterns, as long as they are
|
||||
not used for matching by multiple threads at the same time. For example, you
|
||||
could use the same stack in all compiled patterns, with a global mutex in the
|
||||
callback to wait until the stack is available for use. However, this is an
|
||||
inefficient solution, and not recommended.
|
||||
</P>
|
||||
<P>
|
||||
This is a suggestion for how a multithreaded program that needs to set up
|
||||
non-default JIT stacks might operate:
|
||||
<pre>
|
||||
During thread initalization
|
||||
thread_local_var = pcre2_jit_stack_create(...)
|
||||
|
||||
During thread exit
|
||||
pcre2_jit_stack_free(thread_local_var)
|
||||
|
||||
Use a one-line callback function
|
||||
return thread_local_var
|
||||
</pre>
|
||||
All the functions described in this section do nothing if JIT is not available.
|
||||
<a name="stackfaq"></a></P>
|
||||
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
|
||||
<P>
|
||||
(1) Why do we need JIT stacks?
|
||||
<br>
|
||||
<br>
|
||||
PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack where
|
||||
the local data of the current node is pushed before checking its child nodes.
|
||||
Allocating real machine stack on some platforms is difficult. For example, the
|
||||
stack chain needs to be updated every time if we extend the stack on PowerPC.
|
||||
Although it is possible, its updating time overhead decreases performance. So
|
||||
we do the recursion in memory.
|
||||
</P>
|
||||
<P>
|
||||
(2) Why don't we simply allocate blocks of memory with <b>malloc()</b>?
|
||||
<br>
|
||||
<br>
|
||||
Modern operating systems have a nice feature: they can reserve an address space
|
||||
instead of allocating memory. We can safely allocate memory pages inside this
|
||||
address space, so the stack could grow without moving memory data (this is
|
||||
important because of pointers). Thus we can allocate 1M address space, and use
|
||||
only a single memory page (usually 4K) if that is enough. However, we can still
|
||||
grow up to 1M anytime if needed.
|
||||
</P>
|
||||
<P>
|
||||
(3) Who "owns" a JIT stack?
|
||||
<br>
|
||||
<br>
|
||||
The owner of the stack is the user program, not the JIT studied pattern or
|
||||
anything else. The user program must ensure that if a stack is being used by
|
||||
<b>pcre2_match()</b>, (that is, it is assigned to a match context that is passed
|
||||
to the pattern currently running), that stack must not be used by any other
|
||||
threads (to avoid overwriting the same memory area). The best practice for
|
||||
multithreaded programs is to allocate a stack for each thread, and return this
|
||||
stack through the JIT callback function.
|
||||
</P>
|
||||
<P>
|
||||
(4) When should a JIT stack be freed?
|
||||
<br>
|
||||
<br>
|
||||
You can free a JIT stack at any time, as long as it will not be used by
|
||||
<b>pcre2_match()</b> again. When you assign the stack to a match context, only a
|
||||
pointer is set. There is no reference counting or any other magic. You can free
|
||||
compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
|
||||
not\fP call <b>pcre2_match()</b> with a match context pointing to an already
|
||||
freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
|
||||
used by <b>pcre2_match()</b> in another thread). You can also replace the stack
|
||||
in a context at any time when it is not in use. You should free the previous
|
||||
stack before assigning a replacement.
|
||||
</P>
|
||||
<P>
|
||||
(5) Should I allocate/free a stack every time before/after calling
|
||||
<b>pcre2_match()</b>?
|
||||
<br>
|
||||
<br>
|
||||
No, because this is too costly in terms of resources. However, you could
|
||||
implement some clever idea which release the stack if it is not used in let's
|
||||
say two minutes. The JIT callback can help to achieve this without keeping a
|
||||
list of patterns.
|
||||
</P>
|
||||
<P>
|
||||
(6) OK, the stack is for long term memory allocation. But what happens if a
|
||||
pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
|
||||
stack is freed?
|
||||
<br>
|
||||
<br>
|
||||
Especially on embedded sytems, it might be a good idea to release memory
|
||||
sometimes without freeing the stack. There is no API for this at the moment.
|
||||
Probably a function call which returns with the currently allocated memory for
|
||||
any stack and another which allows releasing memory (shrinking the stack) would
|
||||
be a good idea if someone needs this.
|
||||
</P>
|
||||
<P>
|
||||
(7) This is too much of a headache. Isn't there any better solution for JIT
|
||||
stack handling?
|
||||
<br>
|
||||
<br>
|
||||
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
|
||||
out this complicated API.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
|
||||
<P>
|
||||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
The JIT executable allocator does not free all memory when it is possible.
|
||||
It expects new allocations, and keeps some free memory around to improve
|
||||
allocation speed. However, in low memory conditions, it might be better to free
|
||||
all possible memory. You can cause this to happen by calling
|
||||
pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
|
||||
memory management, or NULL for standard memory management.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
|
||||
<P>
|
||||
This is a single-threaded example that specifies a JIT stack without using a
|
||||
callback. A real program should include error checking after all the function
|
||||
calls.
|
||||
<pre>
|
||||
int rc;
|
||||
pcre2_code *re;
|
||||
pcre2_match_data *match_data;
|
||||
pcre2_match_context *mcontext;
|
||||
pcre2_jit_stack *jit_stack;
|
||||
|
||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||
&errornumber, &erroffset, NULL);
|
||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||
mcontext = pcre2_match_context_create(NULL);
|
||||
jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL);
|
||||
pcre2_jit_stack_assign(mcontext, NULL, jit_stack);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext);
|
||||
/* Process result */
|
||||
|
||||
pcre2_code_free(re);
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_match_context_free(mcontext);
|
||||
pcre2_jit_stack_free(jit_stack);
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">JIT FAST PATH API</a><br>
|
||||
<P>
|
||||
Because the API described above falls back to interpreted matching when JIT is
|
||||
not available, it is convenient for programs that are written for general use
|
||||
in many environments. However, calling JIT via <b>pcre2_match()</b> does have a
|
||||
performance impact. Programs that are written for use where JIT is known to be
|
||||
available, and which need the best possible performance, can instead use a
|
||||
"fast path" API to call JIT matching directly instead of calling
|
||||
<b>pcre2_match()</b> (obviously only for patterns that have been successfully
|
||||
processed by <b>pcre2_jit_compile()</b>).
|
||||
</P>
|
||||
<P>
|
||||
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
|
||||
the same arguments as <b>pcre2_match()</b>. The return values are also the same,
|
||||
plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
|
||||
requested that was not compiled. Unsupported option bits (for example,
|
||||
PCRE2_ANCHORED) are ignored.
|
||||
</P>
|
||||
<P>
|
||||
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
|
||||
number of other sanity checks are performed on the arguments. For example, if
|
||||
the subject pointer is NULL, an immediate error is given. Also, unless
|
||||
PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
|
||||
interests of speed, these checks do not happen on the JIT fast path, and if
|
||||
invalid data is passed, the result is undefined.
|
||||
</P>
|
||||
<P>
|
||||
Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
|
||||
speedups of more than 10%.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2api</b>(3)
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel (FAQ by Zoltan Herczeg)
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 27 November 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user