Added bundled PCRE2 library.

This commit is contained in:
Markus Makela
2015-10-20 14:06:04 +03:00
parent 91bb3b288c
commit ee29e85016
333 changed files with 274569 additions and 0 deletions

View File

@ -40,6 +40,10 @@ find_package(TCMalloc)
find_package(Jemalloc)
find_package(Git)
find_package(CURL)
# Build PCRE2
include(cmake/BuildPCRE2.cmake)
# You can find the variables set by this in the FindCURL.cmake file
# which is a default module in CMake.

16
cmake/BuildPCRE2.cmake Normal file
View File

@ -0,0 +1,16 @@
# Build the PCRE2 library from source
set(PCRE_ROOT_DIR ${CMAKE_SOURCE_DIR}/pcre2/)
set(PCRE_BUILD_DIR ${CMAKE_BINARY_DIR}/pcre2/)
execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${PCRE_BUILD_DIR})
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${PCRE_ROOT_DIR} ${PCRE_BUILD_DIR})
execute_process(COMMAND ${CMAKE_COMMAND} ${PCRE_BUILD_DIR}
-DBUILD_SHARED_LIBS=Y
-DPCRE2_BUILD_PCRE2GREP=N
-DPCRE2_BUILD_TESTS=N
WORKING_DIRECTORY ${PCRE_BUILD_DIR})
execute_process(COMMAND make WORKING_DIRECTORY ${PCRE_BUILD_DIR})
set(PCRE2_LIBRARIES ${CMAKE_BINARY_DIR}/pcre2/libpcre2-8.so CACHE STRING "PCRE2 dynamic libraries" FORCE)
include_directories(${CMAKE_BINARY_DIR}/pcre2/)
install(PROGRAMS ${PCRE2_LIBRARIES} DESTINATION ${MAXSCALE_LIBDIR})

313
pcre2/132html Executable file
View File

@ -0,0 +1,313 @@
#! /usr/bin/perl -w
# Script to turn PCRE2 man pages into HTML
# Subroutine to handle font changes and other escapes
sub do_line {
my($s) = $_[0];
$s =~ s/</&#60;/g; # Deal with < and >
$s =~ s/>/&#62;/g;
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
$s =~ s"\\e"\\"g;
$s =~ s/(?<=Copyright )\(c\)/&copy;/g;
$s;
}
# Subroutine to ensure not in a paragraph
sub end_para {
if ($inpara)
{
print TEMP "</PRE>\n" if ($inpre);
print TEMP "</P>\n";
}
$inpara = $inpre = 0;
$wrotetext = 0;
}
# Subroutine to start a new paragraph
sub new_para {
&end_para();
print TEMP "<P>\n";
$inpara = 1;
}
# Main program
$innf = 0;
$inpara = 0;
$inpre = 0;
$wrotetext = 0;
$toc = 0;
$ref = 1;
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
{
$toc = 1 if $ARGV[0] eq "-toc";
shift;
}
# Initial output to STDOUT
print <<End ;
<html>
<head>
<title>$ARGV[0] specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>$ARGV[0] man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
End
print "<ul>\n" if ($toc);
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
while (<STDIN>)
{
# Handle lines beginning with a dot
if (/^\./)
{
# Some of the PCRE2 man pages used to contain instances of .br. However,
# they should have all been removed because they cause trouble in some
# (other) automated systems that translate man pages to HTML. Complain if
# we find .br or .in (another macro that is deprecated).
if (/^\.br/ || /^\.in/)
{
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
print STDERR "*** $_\n";
die "*** Processing abandoned\n";
}
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
elsif (/^\.nf/)
{
$innf = 1;
}
elsif (/^\.fi/)
{
$innf = 0;
}
# Handling .sp is subtle. If it is inside a literal section, do nothing if
# the next line is a non literal text line; similarly, if not inside a
# literal section, do nothing if a literal follows, unless we are inside
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
# literal sections will do the spacing. Always skip if no previous output.
elsif (/^\.sp/)
{
if ($wrotetext)
{
$_ = <STDIN>;
if ($inpre)
{
print TEMP "\n" if (/^[\s.]/);
}
else
{
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
}
redo; # Now process the lookahead line we just read
}
}
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
{
&new_para();
}
elsif (/^\.SH\s*("?)(.*)\1/)
{
# Ignore the NAME section
if ($2 =~ /^NAME\b/)
{
<STDIN>;
next;
}
&end_para();
my($title) = &do_line($2);
if ($toc)
{
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
$ref, $ref);
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
$ref, $ref);
$ref++;
}
else
{
print TEMP "<br><b>\n$title\n</b><br>\n";
}
}
elsif (/^\.SS\s*("?)(.*)\1/)
{
&end_para();
my($title) = &do_line($2);
print TEMP "<br><b>\n$title\n</b><br>\n";
}
elsif (/^\.B\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "<b>$_</b>\n";
$wrotetext = 1;
}
elsif (/^\.I\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "<i>$_</i>\n";
$wrotetext = 1;
}
# A comment that starts "HREF" takes the next line as a name that
# is turned into a hyperlink, using the text given, which might be
# in a special font. If it ends in () or (digits) or punctuation, they
# aren't part of the link.
elsif (/^\.\\"\s*HREF/)
{
$_=<STDIN>;
chomp;
$_ = &do_line($_);
$_ =~ s/\s+$//;
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
print TEMP "<a href=\"$1.html\">$_</a>\n";
}
# A comment that starts "HTML" inserts literal HTML
elsif (/^\.\\"\s*HTML\s*(.*)/)
{
print TEMP $1;
}
# A comment that starts < inserts that HTML at the end of the
# *next* input line - so as not to get a newline between them.
elsif (/^\.\\"\s*(<.*>)/)
{
my($markup) = $1;
$_=<STDIN>;
chomp;
$_ = &do_line($_);
$_ =~ s/\s+$//;
print TEMP "$_$markup\n";
}
# A comment that starts JOIN joins the next two lines together, with one
# space between them. Then that line is processed. This is used in some
# displays where two lines are needed for the "man" version. JOINSH works
# the same, except that it assumes this is a shell command, so removes
# continuation backslashes.
elsif (/^\.\\"\s*JOIN(SH)?/)
{
my($one,$two);
$one = <STDIN>;
$two = <STDIN>;
$one =~ s/\s*\\e\s*$// if (defined($1));
chomp($one);
$two =~ s/^\s+//;
$_ = "$one $two";
redo; # Process the joined lines
}
# .EX/.EE are used in the pcre2demo page to bracket the entire program,
# which is unmodified except for turning backslash into "\e".
elsif (/^\.EX\s*$/)
{
print TEMP "<PRE>\n";
while (<STDIN>)
{
last if /^\.EE\s*$/;
s/\\e/\\/g;
s/&/&amp;/g;
s/</&lt;/g;
s/>/&gt;/g;
print TEMP;
}
}
# Ignore anything not recognized
next;
}
# Line does not begin with a dot. Replace blank lines with new paragraphs
if (/^\s*$/)
{
&end_para() if ($wrotetext);
next;
}
# Convert fonts changes and output an ordinary line. Ensure that indented
# lines are marked as literal.
$_ = &do_line($_);
&new_para() if (!$inpara);
if (/^\s/)
{
if (!$inpre)
{
print TEMP "<pre>\n";
$inpre = 1;
}
}
elsif ($inpre)
{
print TEMP "</pre>\n";
$inpre = 0;
}
# Add <br> to the end of a non-literal line if we are within .nf/.fi
$_ .= "<br>\n" if (!$inpre && $innf);
print TEMP;
$wrotetext = 1;
}
# The TOC, if present, will have been written - terminate it
print "</ul>\n" if ($toc);
# Copy the remainder to the standard output
close(TEMP);
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
print while (<TEMP>);
print <<End ;
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
End
close(TEMP);
unlink("/tmp/$$");
# End

36
pcre2/AUTHORS Normal file
View File

@ -0,0 +1,36 @@
THE MAIN PCRE2 LIBRARY CODE
---------------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2015 University of Cambridge
All rights reserved
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
--------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2015 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2015 Zoltan Herczeg
All rights reserved.
####

768
pcre2/CMakeLists.txt Normal file
View File

@ -0,0 +1,768 @@
# CMakeLists.txt
#
#
# This file enables PCRE2 to be built with the CMake configuration and build
# tool. Download CMake in source or binary form from http://www.cmake.org/
# Converted to support PCRE2 from the original PCRE file, August 2014.
#
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
# 2007-09-19 Adjusted by PH to retain previous default settings
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
# (b) Ensure pcretest and pcregrep link with the local library,
# not a previously-installed one.
# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
# PCRE_SUPPORT_LIBBZ2.
# 2008-01-20 Brought up to date to include several new features by Christian
# Ehrlicher.
# 2008-01-22 Sheri added options for backward compatibility of library names
# when building with minGW:
# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
# be built without "lib" as prefix. (The libraries will be named
# pcre.dll, pcreposix.dll and pcrecpp.dll).
# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
# be built with suffix of "-0.dll". (The libraries will be named
# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
# built by default with Configure and Make.
# 2008-01-23 PH removed the automatic build of pcredemo.
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
# 2008-07-03 PH updated for revised UCP property support (change of files)
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
# is included within another project.
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
# add options to stop the building of pcregrep and the tests, and
# to disable the final configuration report.
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
# are set by specifying a release type.
# 2010-01-02 PH added test for stdint.h
# 2010-03-02 PH added test for inttypes.h
# 2011-08-01 PH added PCREGREP_BUFSIZE
# 2011-08-22 PH added PCRE_SUPPORT_JIT
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
# the source dir by the user prior to building
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
# of DEBUG location only (likely only matters in MSVC)
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
# RunGrepTest (used for UNIX and Msys)
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
# RunTest.bat in Win32 (for effortless testing with "make test")
# 2011-10-04 Sheri Increased minimum required cmake version
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
# 2012-01-10 Zoltan Herczeg added libpcre16 support
# 2012-01-13 Stephen Kelly added out of source build support
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
# of the configure.ac file
# 2012-02-26 PH added support for libedit
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
# 2012-09-08 ChPe added PCRE32 support
# 2012-10-23 PH added support for VALGRIND and GCOV
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
# so it has been removed.
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
# 2015-04024 PH added support for PCRE2_DEBUG
PROJECT(PCRE2 C)
# Increased minimum to 2.8.0 to support newer add_test features. Set policy
# CMP0026 to avoid warnings for the use of LOCATION in GET_TARGET_PROPERTY.
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
CMAKE_POLICY(SET CMP0026 OLD)
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
SET(CMAKE_C_FLAGS -I${PROJECT_SOURCE_DIR}/src)
# external packages
FIND_PACKAGE( BZip2 )
FIND_PACKAGE( ZLIB )
FIND_PACKAGE( Readline )
FIND_PACKAGE( Editline )
# Configuration checks
INCLUDE(CheckIncludeFile)
INCLUDE(CheckFunctionExists)
INCLUDE(CheckTypeSize)
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
CHECK_INCLUDE_FILE(inttypes.h HAVE_INTTYPES_H)
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
# User-configurable options
#
# Note: CMakeSetup displays these in alphabetical order, regardless of
# the order we use here.
SET(BUILD_SHARED_LIBS OFF CACHE BOOL
"Build shared libraries instead of static ones.")
OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
OPTION(PCRE2_DEBUG "Include debugging code" OFF)
SET(PCRE2_EBCDIC OFF CACHE BOOL
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
SET(PCRE2_EBCDIC_NL25 OFF CACHE BOOL
"Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
SET(PCRE2_LINK_SIZE "2" CACHE STRING
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
SET(PCRE2_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING
"Buffer size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
SET(PCRE2_NEWLINE "LF" CACHE STRING
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
"If ON, then don't use stack recursion when matching. See HEAP_MATCH_RECURSE in config.h.in for details.")
SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
"Enable support for Just-in-time compiling.")
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
"Enable use of Just-in-time compiling in pcre2grep.")
SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL
"Enable Valgrind support.")
OPTION(PCRE2_SHOW_REPORT "Show the final configuration report" ON)
OPTION(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON)
OPTION(PCRE2_BUILD_TESTS "Build the tests" ON)
IF (MINGW)
OPTION(NON_STANDARD_LIB_PREFIX
"ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc."
OFF)
OPTION(NON_STANDARD_LIB_SUFFIX
"ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc."
OFF)
ENDIF(MINGW)
IF(MSVC)
OPTION(INSTALL_MSVC_PDB
"ON=Install .pdb files built by MSVC, if generated"
OFF)
ENDIF(MSVC)
# bzip2 lib
IF(BZIP2_FOUND)
OPTION (PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON)
ENDIF(BZIP2_FOUND)
IF(PCRE2_SUPPORT_LIBBZ2)
INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR})
ENDIF(PCRE2_SUPPORT_LIBBZ2)
# zlib
IF(ZLIB_FOUND)
OPTION (PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON)
ENDIF(ZLIB_FOUND)
IF(PCRE2_SUPPORT_LIBZ)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
ENDIF(PCRE2_SUPPORT_LIBZ)
# editline lib
IF(EDITLINE_FOUND)
OPTION (PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF)
ENDIF(EDITLINE_FOUND)
IF(PCRE2_SUPPORT_LIBEDIT)
INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
ENDIF(PCRE2_SUPPORT_LIBEDIT)
# readline lib
IF(READLINE_FOUND)
OPTION (PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON)
ENDIF(READLINE_FOUND)
IF(PCRE2_SUPPORT_LIBREADLINE)
INCLUDE_DIRECTORIES(${READLINE_INCLUDE_DIR})
ENDIF(PCRE2_SUPPORT_LIBREADLINE)
# Prepare build configuration
IF(NOT BUILD_SHARED_LIBS)
SET(PCRE2_STATIC 1)
ENDIF(NOT BUILD_SHARED_LIBS)
IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled")
ENDIF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
IF(PCRE2_BUILD_PCRE2_8)
SET(SUPPORT_PCRE2_8 1)
ENDIF(PCRE2_BUILD_PCRE2_8)
IF(PCRE2_BUILD_PCRE2_16)
SET(SUPPORT_PCRE2_16 1)
ENDIF(PCRE2_BUILD_PCRE2_16)
IF(PCRE2_BUILD_PCRE2_32)
SET(SUPPORT_PCRE2_32 1)
ENDIF(PCRE2_BUILD_PCRE2_32)
IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
MESSAGE(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program")
SET(PCRE2_BUILD_PCRE2GREP OFF)
ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
IF(PCRE2_SUPPORT_BSR_ANYCRLF)
SET(BSR_ANYCRLF 1)
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
IF(PCRE2_SUPPORT_UNICODE)
SET(SUPPORT_UNICODE 1)
ENDIF(PCRE2_SUPPORT_UNICODE)
IF(PCRE2_SUPPORT_JIT)
SET(SUPPORT_JIT 1)
ENDIF(PCRE2_SUPPORT_JIT)
IF(PCRE2_SUPPORT_PCRE2GREP_JIT)
SET(SUPPORT_PCRE2GREP_JIT 1)
ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT)
IF(PCRE2_SUPPORT_VALGRIND)
SET(SUPPORT_VALGRIND 1)
ENDIF(PCRE2_SUPPORT_VALGRIND)
# This next one used to reference ${READLINE_LIBRARY})
# but I was advised to add the NCURSES test as well, along with
# some modifications to cmake/FindReadline.cmake which should
# make it possible to override the default if necessary. PH
IF(PCRE2_SUPPORT_LIBREADLINE)
SET(SUPPORT_LIBREADLINE 1)
SET(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
ENDIF(PCRE2_SUPPORT_LIBREADLINE)
# libedit is a plug-compatible alternative to libreadline
IF(PCRE2_SUPPORT_LIBEDIT)
SET(SUPPORT_LIBEDIT 1)
SET(PCRE2TEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY})
ENDIF(PCRE2_SUPPORT_LIBEDIT)
IF(PCRE2_SUPPORT_LIBZ)
SET(SUPPORT_LIBZ 1)
SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES})
ENDIF(PCRE2_SUPPORT_LIBZ)
IF(PCRE2_SUPPORT_LIBBZ2)
SET(SUPPORT_LIBBZ2 1)
SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES})
ENDIF(PCRE2_SUPPORT_LIBBZ2)
SET(NEWLINE_DEFAULT "")
IF(PCRE2_NEWLINE STREQUAL "CR")
SET(NEWLINE_DEFAULT "1")
ENDIF(PCRE2_NEWLINE STREQUAL "CR")
IF(PCRE2_NEWLINE STREQUAL "LF")
SET(NEWLINE_DEFAULT "2")
ENDIF(PCRE2_NEWLINE STREQUAL "LF")
IF(PCRE2_NEWLINE STREQUAL "CRLF")
SET(NEWLINE_DEFAULT "3")
ENDIF(PCRE2_NEWLINE STREQUAL "CRLF")
IF(PCRE2_NEWLINE STREQUAL "ANY")
SET(NEWLINE_DEFAULT "4")
ENDIF(PCRE2_NEWLINE STREQUAL "ANY")
IF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
SET(NEWLINE_DEFAULT "5")
ENDIF(PCRE2_NEWLINE STREQUAL "ANYCRLF")
IF(NEWLINE_DEFAULT STREQUAL "")
MESSAGE(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
ENDIF(NEWLINE_DEFAULT STREQUAL "")
IF(PCRE2_EBCDIC)
SET(EBCDIC 1)
ENDIF(PCRE2_EBCDIC)
IF(PCRE2_EBCDIC_NL25)
SET(EBCDIC 1)
SET(EBCDIC_NL25 1)
ENDIF(PCRE2_EBCDIC_NL25)
IF(PCRE2_HEAP_MATCH_RECURSE)
SET(HEAP_MATCH_RECURSE 1)
ENDIF(PCRE2_HEAP_MATCH_RECURSE)
# Output files
CONFIGURE_FILE(config-cmake.h.in
${PROJECT_BINARY_DIR}/config.h
@ONLY)
# Parse version numbers and date out of configure.ac
file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
configure_lines
LIMIT_COUNT 50 # Read only the first 50 lines of the file
)
set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
foreach(configure_line ${configure_lines})
foreach(_substitution_variable ${SEARCHED_VARIABLES})
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
if (NOT ${_substitution_variable_upper})
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
if (CMAKE_MATCH_1)
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
endif()
endif()
endforeach()
endforeach()
CONFIGURE_FILE(src/pcre2.h.in
${PROJECT_BINARY_DIR}/pcre2.h
@ONLY)
# What about pcre2-config and libpcre2.pc?
# Character table generation
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
IF(PCRE2_REBUILD_CHARTABLES)
ADD_EXECUTABLE(dftables src/dftables.c)
ADD_CUSTOM_COMMAND(
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
DEPENDS dftables
COMMAND dftables
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
)
ELSE(PCRE2_REBUILD_CHARTABLES)
CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist
${PROJECT_BINARY_DIR}/pcre2_chartables.c
COPYONLY)
ENDIF(PCRE2_REBUILD_CHARTABLES)
# Source code
SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
SET(PCRE2_SOURCES
src/pcre2_auto_possess.c
${PROJECT_BINARY_DIR}/pcre2_chartables.c
src/pcre2_compile.c
src/pcre2_config.c
src/pcre2_context.c
src/pcre2_dfa_match.c
src/pcre2_error.c
src/pcre2_jit_compile.c
src/pcre2_maketables.c
src/pcre2_match.c
src/pcre2_match_data.c
src/pcre2_newline.c
src/pcre2_ord2utf.c
src/pcre2_pattern_info.c
src/pcre2_serialize.c
src/pcre2_string_utils.c
src/pcre2_study.c
src/pcre2_substitute.c
src/pcre2_substring.c
src/pcre2_tables.c
src/pcre2_ucd.c
src/pcre2_valid_utf.c
src/pcre2_xclass.c
)
SET(PCRE2POSIX_HEADERS src/pcre2posix.h)
SET(PCRE2POSIX_SOURCES src/pcre2posix.c)
IF(MINGW AND NOT PCRE2_STATIC)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
PRE-LINK
COMMAND windres ARGS pcre2.rc pcre2.o
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT Using pcre2 coff info in mingw build)
SET(PCRE2_SOURCES
${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o
)
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o
PRE-LINK
COMMAND windres ARGS pcre2posix.rc pcre2posix.o
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT Using pcre2posix coff info in mingw build)
SET(PCRE2POSIX_SOURCES
${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o
)
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
ENDIF(MINGW AND NOT PCRE2_STATIC)
IF(MSVC AND NOT PCRE2_STATIC)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
SET(PCRE2_SOURCES
${PCRE2_SOURCES} pcre2.rc)
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
SET(PCRE2POSIX_SOURCES
${PCRE2POSIX_SOURCES} pcre2posix.rc)
ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
ENDIF(MSVC AND NOT PCRE2_STATIC)
# Build setup
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
IF(MSVC)
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
ENDIF(MSVC)
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
# needed to make sure to not link debug libs
# against release libs and vice versa
IF(WIN32)
SET(CMAKE_DEBUG_POSTFIX "d")
ENDIF(WIN32)
SET(targets)
# 8-bit library
IF(PCRE2_BUILD_PCRE2_8)
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET_PROPERTY(TARGET pcre2-8
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2-8)
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
SET_PROPERTY(TARGET pcre2posix
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2posix)
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
IF(MINGW AND NOT PCRE2_STATIC)
IF(NON_STANDARD_LIB_PREFIX)
SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES PREFIX "")
ENDIF(NON_STANDARD_LIB_PREFIX)
IF(NON_STANDARD_LIB_SUFFIX)
SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES SUFFIX "-0.dll")
ENDIF(NON_STANDARD_LIB_SUFFIX)
ENDIF(MINGW AND NOT PCRE2_STATIC)
ENDIF(PCRE2_BUILD_PCRE2_8)
# 16-bit library
IF(PCRE2_BUILD_PCRE2_16)
ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET_PROPERTY(TARGET pcre2-16
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
SET(targets ${targets} pcre2-16)
IF(MINGW AND NOT PCRE2_STATIC)
IF(NON_STANDARD_LIB_PREFIX)
SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES PREFIX "")
ENDIF(NON_STANDARD_LIB_PREFIX)
IF(NON_STANDARD_LIB_SUFFIX)
SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES SUFFIX "-0.dll")
ENDIF(NON_STANDARD_LIB_SUFFIX)
ENDIF(MINGW AND NOT PCRE2_STATIC)
ENDIF(PCRE2_BUILD_PCRE2_16)
# 32-bit library
IF(PCRE2_BUILD_PCRE2_32)
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET_PROPERTY(TARGET pcre2-32
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
SET(targets ${targets} pcre2-32)
IF(MINGW AND NOT PCRE2_STATIC)
IF(NON_STANDARD_LIB_PREFIX)
SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES PREFIX "")
ENDIF(NON_STANDARD_LIB_PREFIX)
IF(NON_STANDARD_LIB_SUFFIX)
SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES SUFFIX "-0.dll")
ENDIF(NON_STANDARD_LIB_SUFFIX)
ENDIF(MINGW AND NOT PCRE2_STATIC)
ENDIF(PCRE2_BUILD_PCRE2_32)
# Executables
IF(PCRE2_BUILD_PCRE2GREP)
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
SET_PROPERTY(TARGET pcre2grep
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2grep)
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
ENDIF(PCRE2_BUILD_PCRE2GREP)
# Testing
IF(PCRE2_BUILD_TESTS)
ENABLE_TESTING()
SET(PCRE2TEST_SOURCES src/pcre2test.c)
ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES})
SET(targets ${targets} pcre2test)
IF(PCRE2_BUILD_PCRE2_8)
LIST(APPEND PCRE2TEST_LIBS pcre2posix pcre2-8)
ENDIF(PCRE2_BUILD_PCRE2_8)
IF(PCRE2_BUILD_PCRE2_16)
LIST(APPEND PCRE2TEST_LIBS pcre2-16)
ENDIF(PCRE2_BUILD_PCRE2_16)
IF(PCRE2_BUILD_PCRE2_32)
LIST(APPEND PCRE2TEST_LIBS pcre2-32)
ENDIF(PCRE2_BUILD_PCRE2_32)
TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS})
IF(PCRE2_SUPPORT_JIT)
ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c)
SET(targets ${targets} pcre2_jit_test)
SET(PCRE2_JIT_TEST_LIBS )
IF(PCRE2_BUILD_PCRE2_8)
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-8)
ENDIF(PCRE2_BUILD_PCRE2_8)
IF(PCRE2_BUILD_PCRE2_16)
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-16)
ENDIF(PCRE2_BUILD_PCRE2_16)
IF(PCRE2_BUILD_PCRE2_32)
LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-32)
ENDIF(PCRE2_BUILD_PCRE2_32)
TARGET_LINK_LIBRARIES(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
ENDIF(PCRE2_SUPPORT_JIT)
# exes in Debug location tested by the RunTest shell script
# via "make test"
IF(PCRE2_BUILD_PCRE2GREP)
GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION)
ENDIF(PCRE2_BUILD_PCRE2GREP)
GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION)
# =================================================
# Write out a CTest configuration file
#
FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
"# This is a generated file.
MESSAGE(\"When testing is complete, review test output in the
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
MESSAGE(\" \")
")
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.sh
"#! /bin/sh
# This is a generated file.
. ${PROJECT_SOURCE_DIR}/RunTest
if test \"$?\" != \"0\"; then exit 1; fi
# End
")
IF(UNIX)
ADD_TEST(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh)
ENDIF(UNIX)
IF(PCRE2_BUILD_PCRE2GREP)
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh
"#! /bin/sh
# This is a generated file.
. ${PROJECT_SOURCE_DIR}/RunGrepTest
if test \"$?\" != \"0\"; then exit 1; fi
# End
")
IF(UNIX)
ADD_TEST(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
ENDIF(UNIX)
ENDIF(PCRE2_BUILD_PCRE2GREP)
IF(WIN32)
# Provide environment for executing the bat file version of RunTest
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
FILE(TO_NATIVE_PATH ${PCRE2TEST_EXE} winexe)
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.bat
"\@REM This is a generated file.
\@echo off
setlocal
SET srcdir=\"${winsrc}\"
SET pcre2test=\"${winexe}\"
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
call %srcdir%\\RunTest.Bat
if errorlevel 1 exit /b 1
echo RunTest.bat tests successfully completed
")
ADD_TEST(NAME pcre2_test_bat
COMMAND pcre2_test.bat)
SET_TESTS_PROPERTIES(pcre2_test_bat PROPERTIES
PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
IF("$ENV{OSTYPE}" STREQUAL "msys")
# Both the sh and bat file versions of RunTest are run if make test is used
# in msys
ADD_TEST(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh)
IF(PCRE2_BUILD_PCRE2GREP)
ADD_TEST(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh)
ENDIF(PCRE2_BUILD_PCRE2GREP)
ENDIF("$ENV{OSTYPE}" STREQUAL "msys")
ENDIF(WIN32)
# Changed to accommodate testing whichever location was just built
IF(PCRE2_SUPPORT_JIT)
ADD_TEST(pcre2_jit_test pcre2_jit_test)
ENDIF(PCRE2_SUPPORT_JIT)
ENDIF(PCRE2_BUILD_TESTS)
# Installation
SET(CMAKE_INSTALL_ALWAYS 1)
INSTALL(TARGETS ${targets}
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
FOREACH(man ${man3})
GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
SET(man3_new ${man3} ${man})
ENDFOREACH(man ${man3})
SET(man3 ${man3_new})
INSTALL(FILES ${man1} DESTINATION man/man1)
INSTALL(FILES ${man3} DESTINATION man/man3)
INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html)
IF(MSVC AND INSTALL_MSVC_PDB)
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb
${PROJECT_BINARY_DIR}/pcre2posix.pdb
DESTINATION bin
CONFIGURATIONS RelWithDebInfo)
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb
${PROJECT_BINARY_DIR}/pcre2posixd.pdb
DESTINATION bin
CONFIGURATIONS Debug)
ENDIF(MSVC AND INSTALL_MSVC_PDB)
# Help, only for nice output
IF(BUILD_SHARED_LIBS)
SET(BUILD_STATIC_LIBS OFF)
ELSE(BUILD_SHARED_LIBS)
SET(BUILD_STATIC_LIBS ON)
ENDIF(BUILD_SHARED_LIBS)
IF(PCRE2_SHOW_REPORT)
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
IF (CMAKE_C_FLAGS)
SET(cfsp " ")
ENDIF(CMAKE_C_FLAGS)
MESSAGE(STATUS "")
MESSAGE(STATUS "")
MESSAGE(STATUS "PCRE2 configuration summary:")
MESSAGE(STATUS "")
MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
MESSAGE(STATUS "")
MESSAGE(STATUS " Build 8 bit PCRE2 library ....... : ${PCRE2_BUILD_PCRE2_8}")
MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_16}")
MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_32}")
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
MESSAGE(STATUS " Use heap recursion .............. : ${PCRE2_HEAP_MATCH_RECURSE}")
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE2_MATCH_LIMIT_RECURSION}")
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}")
MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}")
MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}")
MESSAGE(STATUS " and pcre2grep)")
IF(ZLIB_FOUND)
MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}")
ELSE(ZLIB_FOUND)
MESSAGE(STATUS " Link pcre2grep with libz ........ : Library not found" )
ENDIF(ZLIB_FOUND)
IF(BZIP2_FOUND)
MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : ${PCRE2_SUPPORT_LIBBZ2}")
ELSE(BZIP2_FOUND)
MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : Library not found" )
ENDIF(BZIP2_FOUND)
IF(EDITLINE_FOUND)
MESSAGE(STATUS " Link pcre2test with libeditline . : ${PCRE2_SUPPORT_LIBEDIT}")
ELSE(EDITLINE_FOUND)
MESSAGE(STATUS " Link pcre2test with libeditline . : Library not found" )
ENDIF(EDITLINE_FOUND)
IF(READLINE_FOUND)
MESSAGE(STATUS " Link pcre2test with libreadline . : ${PCRE2_SUPPORT_LIBREADLINE}")
ELSE(READLINE_FOUND)
MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" )
ENDIF(READLINE_FOUND)
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
IF(MINGW AND NOT PCRE2_STATIC)
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
ENDIF(MINGW AND NOT PCRE2_STATIC)
IF(MSVC)
MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
ENDIF(MSVC)
MESSAGE(STATUS "")
ENDIF(PCRE2_SHOW_REPORT)
# end CMakeLists.txt

674
pcre2/COPYING Normal file
View File

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.

367
pcre2/ChangeLog Normal file
View File

@ -0,0 +1,367 @@
Change Log for PCRE2
--------------------
Version 10.20 30-June-2015
--------------------------
1. Callouts with string arguments have been added.
2. Assertion code generator in JIT has been optimized.
3. The invalid pattern (?(?C) has a missing assertion condition at the end. The
pcre2_compile() function read past the end of the input before diagnosing an
error. This bug was discovered by the LLVM fuzzer.
4. Implemented pcre2_callout_enumerate().
5. Fix JIT compilation of conditional blocks whose assertion is converted to
(*FAIL). E.g: /(?(?!))/.
6. The pattern /(?(?!)^)/ caused references to random memory. This bug was
discovered by the LLVM fuzzer.
7. The assertion (?!) is optimized to (*FAIL). This was not handled correctly
when this assertion was used as a condition, for example (?(?!)a|b). In
pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect
error about an unsupported item.
8. For some types of pattern, for example /Z*(|d*){216}/, the auto-
possessification code could take exponential time to complete. A recursion
depth limit of 1000 has been imposed to limit the resources used by this
optimization. This infelicity was discovered by the LLVM fuzzer.
9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored
because \S ensures they are all in the class. The code for doing this was
interacting badly with the code for computing the amount of space needed to
compile the pattern, leading to a buffer overflow. This bug was discovered by
the LLVM fuzzer.
10. A pattern such as /((?2)+)((?1))/ which has mutual recursion nested inside
other kinds of group caused stack overflow at compile time. This bug was
discovered by the LLVM fuzzer.
11. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment
between a subroutine call and its quantifier was incorrectly compiled, leading
to buffer overflow or other errors. This bug was discovered by the LLVM fuzzer.
12. The illegal pattern /(?(?<E>.*!.*)?)/ was not being diagnosed as missing an
assertion after (?(. The code was failing to check the character after (?(?<
for the ! or = that would indicate a lookbehind assertion. This bug was
discovered by the LLVM fuzzer.
13. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with
a fixed maximum following a group that contains a subroutine reference was
incorrectly compiled and could trigger buffer overflow. This bug was discovered
by the LLVM fuzzer.
14. Negative relative recursive references such as (?-7) to non-existent
subpatterns were not being diagnosed and could lead to unpredictable behaviour.
This bug was discovered by the LLVM fuzzer.
15. The bug fixed in 14 was due to an integer variable that was unsigned when
it should have been signed. Some other "int" variables, having been checked,
have either been changed to uint32_t or commented as "must be signed".
16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1)))
caused a stack overflow instead of the diagnosis of a non-fixed length
lookbehind assertion. This bug was discovered by the LLVM fuzzer.
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
18. There was a similar problem to 17 in pcre2test for global matches, though
the code there did catch the loop.
19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
and a subsequent item in the pattern caused a non-match, backtracking over the
repeated \X did not stop, but carried on past the start of the subject, causing
reference to random memory and/or a segfault. There were also some other cases
where backtracking after \C could crash. This set of bugs was discovered by the
LLVM fuzzer.
20. The function for finding the minimum length of a matching string could take
a very long time if mutual recursion was present many times in a pattern, for
example, /((?2){73}(?2))((?1))/. A better mutual recursion detection method has
been implemented. This infelicity was discovered by the LLVM fuzzer.
21. Implemented PCRE2_NEVER_BACKSLASH_C.
22. The feature for string replication in pcre2test could read from freed
memory if the replication required a buffer to be extended, and it was not
working properly in 16-bit and 32-bit modes. This issue was discovered by a
fuzzer: see http://lcamtuf.coredump.cx/afl/.
23. Added the PCRE2_ALT_CIRCUMFLEX option.
24. Adjust the treatment of \8 and \9 to be the same as the current Perl
behaviour.
25. Static linking against the PCRE2 library using the pkg-config module was
failing on missing pthread symbols.
26. If a group that contained a recursive back reference also contained a
forward reference subroutine call followed by a non-forward-reference
subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
compile correct code, leading to undefined behaviour or an internally detected
error. This bug was discovered by the LLVM fuzzer.
27. Quantification of certain items (e.g. atomic back references) could cause
incorrect code to be compiled when recursive forward references were involved.
For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was
discovered by the LLVM fuzzer.
28. A repeated conditional group whose condition was a reference by name caused
a buffer overflow if there was more than one group with the given name. This
bug was discovered by the LLVM fuzzer.
29. A recursive back reference by name within a group that had the same name as
another group caused a buffer overflow. For example: /(?J)(?'d'(?'d'\g{d}))/.
This bug was discovered by the LLVM fuzzer.
30. A forward reference by name to a group whose number is the same as the
current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused a
buffer overflow at compile time. This bug was discovered by the LLVM fuzzer.
31. Fix -fsanitize=undefined warnings for left shifts of 1 by 31 (it treats 1
as an int; fixed by writing it as 1u).
32. Fix pcre2grep compile when -std=c99 is used with gcc, though it still gives
a warning for "fileno" unless -std=gnu99 us used.
33. A lookbehind assertion within a set of mutually recursive subpatterns could
provoke a buffer overflow. This bug was discovered by the LLVM fuzzer.
34. Give an error for an empty subpattern name such as (?'').
35. Make pcre2test give an error if a pattern that follows #forbud_utf contains
\P, \p, or \X.
36. The way named subpatterns are handled has been refactored. There is now a
pre-pass over the regex which does nothing other than identify named
subpatterns and count the total captures. This means that information about
named patterns is known before the rest of the compile. In particular, it means
that forward references can be checked as they are encountered. Previously, the
code for handling forward references was contorted and led to several errors in
computing the memory requirements for some patterns, leading to buffer
overflows.
37. There was no check for integer overflow in subroutine calls such as (?123).
38. The table entry for \l in EBCDIC environments was incorrect, leading to its
being treated as a literal 'l' instead of causing an error.
39. If a non-capturing group containing a conditional group that could match
an empty string was repeated, it was not identified as matching an empty string
itself. For example: /^(?:(?(1)x|)+)+$()/.
40. In an EBCDIC environment, pcretest was mishandling the escape sequences
\a and \e in test subject lines.
41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
instead of the EBCDIC value.
42. The handling of \c in an EBCDIC environment has been revised so that it is
now compatible with the specification in Perl's perlebcdic page.
43. Single character repetition in JIT has been improved. 20-30% speedup
was achieved on certain patterns.
44. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in
ASCII/Unicode. This has now been added to the list of characters that are
recognized as white space in EBCDIC.
45. When PCRE2 was compiled without Unicode support, the use of \p and \P gave
an error (correctly) when used outside a class, but did not give an error
within a class.
46. \h within a class was incorrectly compiled in EBCDIC environments.
47. JIT should return with error when the compiled pattern requires
more stack space than the maximum.
48. Fixed a memory leak in pcre2grep when a locale is set.
Version 10.10 06-March-2015
---------------------------
1. When a pattern is compiled, it remembers the highest back reference so that
when matching, if the ovector is too small, extra memory can be obtained to
use instead. A conditional subpattern whose condition is a check on a capture
having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is
another kind of back reference, but it was not setting the highest
backreference number. This mattered only if pcre2_match() was called with an
ovector that was too small to hold the capture, and there was no other kind of
back reference (a situation which is probably quite rare). The effect of the
bug was that the condition was always treated as FALSE when the capture could
not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug
has been fixed.
2. Functions for serialization and deserialization of sets of compiled patterns
have been added.
3. The value that is returned by PCRE2_INFO_SIZE has been corrected to remove
excess code units at the end of the data block that may occasionally occur if
the code for calculating the size over-estimates. This change stops the
serialization code copying uninitialized data, to which valgrind objects. The
documentation of PCRE2_INFO_SIZE was incorrect in stating that the size did not
include the general overhead. This has been corrected.
4. All code units in every slot in the table of group names are now set, again
in order to avoid accessing uninitialized data when serializing.
5. The (*NO_JIT) feature is implemented.
6. If a bug that caused pcre2_compile() to use more memory than allocated was
triggered when using valgrind, the code in (3) above passed a stupidly large
value to valgrind. This caused a crash instead of an "internal error" return.
7. A reference to a duplicated named group (either a back reference or a test
for being set in a conditional) that occurred in a part of the pattern where
PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern
to be incorrectly calculated, leading to overwriting.
8. A mutually recursive set of back references such as (\2)(\1) caused a
segfault at compile time (while trying to find the minimum matching length).
The infinite loop is now broken (with the minimum length unset, that is, zero).
9. If an assertion that was used as a condition was quantified with a minimum
of zero, matching went wrong. In particular, if the whole group had unlimited
repetition and could match an empty string, a segfault was likely. The pattern
(?(?=0)?)+ is an example that caused this. Perl allows assertions to be
quantified, but not if they are being used as conditions, so the above pattern
is faulted by Perl. PCRE2 has now been changed so that it also rejects such
patterns.
10. The error message for an invalid quantifier has been changed from "nothing
to repeat" to "quantifier does not follow a repeatable item".
11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but
scanning the compiled pattern in subsequent auto-possessification can get out
of step and lead to an unknown opcode. Previously this could have caused an
infinite loop. Now it generates an "internal error" error. This is a tidyup,
not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an
undefined outcome.
12. A UTF pattern containing a "not" match of a non-ASCII character and a
subroutine reference could loop at compile time. Example: /[^\xff]((?1))/.
13. The locale test (RunTest 3) has been upgraded. It now checks that a locale
that is found in the output of "locale -a" can actually be set by pcre2test
before it is accepted. Previously, in an environment where a locale was listed
but would not set (an example does exist), the test would "pass" without
actually doing anything. Also the fr_CA locale has been added to the list of
locales that can be used.
14. Fixed a bug in pcre2_substitute(). If a replacement string ended in a
capturing group number without parentheses, the last character was incorrectly
literally included at the end of the replacement string.
15. A possessive capturing group such as (a)*+ with a minimum repeat of zero
failed to allow the zero-repeat case if pcre2_match() was called with an
ovector too small to capture the group.
16. Improved error message in pcre2test when setting the stack size (-S) fails.
17. Fixed two bugs in CMakeLists.txt: (1) Some lines had got lost in the
transfer from PCRE1, meaning that CMake configuration failed if "build tests"
was selected. (2) The file src/pcre2_serialize.c had not been added to the list
of PCRE2 sources, which caused a failure to build pcre2test.
18. Fixed typo in pcre2_serialize.c (DECL instead of DEFN) that causes problems
only on Windows.
19. Use binary input when reading back saved serialized patterns in pcre2test.
20. Added RunTest.bat for running the tests under Windows.
21. "make distclean" was not removing config.h, a file that may be created for
use with CMake.
22. A pattern such as "((?2){0,1999}())?", which has a group containing a
forward reference repeated a large (but limited) number of times within a
repeated outer group that has a zero minimum quantifier, caused incorrect code
to be compiled, leading to the error "internal error: previously-checked
referenced subpattern not found" when an incorrect memory address was read.
This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's
FortiGuard Labs. (Added 24-March-2015: CVE-2015-2325 was given to this.)
23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine
call within a group that also contained a recursive back reference caused
incorrect code to be compiled. This bug was reported as "heap overflow",
discovered by Kai Lu of Fortinet's FortiGuard Labs. (Added 24-March-2015:
CVE-2015-2326 was given to this.)
24. Computing the size of the JIT read-only data in advance has been a source
of various issues, and new ones are still appear unfortunately. To fix
existing and future issues, size computation is eliminated from the code,
and replaced by on-demand memory allocation.
25. A pattern such as /(?i)[A-`]/, where characters in the other case are
adjacent to the end of the range, and the range contained characters with more
than one other case, caused incorrect behaviour when compiled in UTF mode. In
that example, the range a-j was left out of the class.
Version 10.00 05-January-2015
-----------------------------
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
API, up to item 20 for release 8.36.
The code of the library was heavily revised as part of the new API
implementation. Details of each and every modification were not individually
logged. In addition to the API changes, the following changes were made. They
are either new functionality, or bug fixes and other noticeable changes of
behaviour that were implemented after the code had been forked.
1. Including Unicode support at build time is now enabled by default, but it
can optionally be disabled. It is not enabled by default at run time (no
change).
2. The test program, now called pcre2test, was re-specified and almost
completely re-written. Its input is not compatible with input for pcretest.
3. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
matched by that pattern.
4. For the benefit of those who use PCRE2 via some other application, that is,
not writing the function calls themselves, it is possible to check the PCRE2
version by matching a pattern such as /(?(VERSION>=10)yes|no)/ against a
string such as "yesno".
5. There are case-equivalent Unicode characters whose encodings use different
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
theoretically possible for this to happen in UTF-16 too.) If a backreference to
a group containing one of these characters was greedily repeated, and during
the match a backtrack occurred, the subject might be backtracked by the wrong
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
Incorrect backtracking meant that group 2 captured only the last two bytes.
This bug has been fixed; the new code is slower, but it is used only when the
strings matched by the repetition are not all the same length.
6. A pattern such as /()a/ was not setting the "first character must be 'a'"
information. This applied to any pattern with a group that matched no
characters, for example: /(?:(?=.)|(?<!x))a/.
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
those parentheses to be closed with whatever has been captured so far. However,
it was failing to mark any other groups between the highest capture so far and
the currrent group as "unset". Thus, the ovector for those groups contained
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
matched against "abcd".
8. The pcre2_substitute() function has been implemented.
9. If an assertion used as a condition was quantified with a minimum of zero
(an odd thing to do, but it happened), SIGSEGV or other misbehaviour could
occur.
10. The PCRE2_NO_DOTSTAR_ANCHOR option has been implemented.
****

67
pcre2/CheckMan Executable file
View File

@ -0,0 +1,67 @@
#! /usr/bin/perl
# A script to scan PCRE2's man pages to check for typos in the control
# sequences. I use only a small set of the available repertoire, so it is
# straightforward to check that nothing else has slipped in by mistake. This
# script should be called in the doc directory.
$yield = 0;
while (scalar(@ARGV) > 0)
{
$line = 0;
$file = shift @ARGV;
open (IN, $file) || die "Failed to open $file\n";
while (<IN>)
{
$line++;
if (/^\s*$/)
{
printf "Empty line $line of $file\n";
$yield = 1;
}
elsif (/^\./)
{
if (!/^\.\s*$|
^\.B\s+\S|
^\.TH\s\S|
^\.SH\s\S|
^\.SS\s\S|
^\.TP(?:\s?\d+)?\s*$|
^\.SM\s*$|
^\.br\s*$|
^\.rs\s*$|
^\.sp\s*$|
^\.nf\s*$|
^\.fi\s*$|
^\.P\s*$|
^\.PP\s*$|
^\.\\"(?:\ HREF)?\s*$|
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
^\.\\"\s<\/a>\s*$|
^\.\\"\sJOINSH\s*$|
^\.\\"\sJOIN\s*$/x
)
{
printf "Bad control line $line of $file\n";
$yield = 1;
}
}
else
{
if (/\\[^ef]|\\f[^IBP]/)
{
printf "Bad backslash in line $line of $file\n";
$yield = 1;
}
}
}
close(IN);
}
exit $yield;
# End

113
pcre2/CleanTxt Executable file
View File

@ -0,0 +1,113 @@
#! /usr/bin/perl -w
# Script to take the output of nroff -man and remove all the backspacing and
# the page footers and the screen commands etc so that it is more usefully
# readable online. In fact, in the latest nroff, intermediate footers don't
# seem to be generated any more.
$blankcount = 0;
$lastwascut = 0;
$firstheader = 1;
# Input on STDIN; output to STDOUT.
while (<STDIN>)
{
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
s/.\x8//g; # Remove "char, backspace"
# Handle header lines. Retain only the first one we encounter, but remove
# the blank line that follows. Any others (e.g. at end of document) and the
# following blank line are dropped.
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
{
if ($firstheader)
{
$firstheader = 0;
print;
$lastprinted = $_;
$lastwascut = 0;
}
$_=<STDIN>; # Remove a blank that follows
next;
}
# Count runs of empty lines
if (/^\s*$/)
{
$blankcount++;
$lastwascut = 0;
next;
}
# If a chunk of lines has been cut out (page footer) and the next line
# has a different indentation, put back one blank line.
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
{
($a) = $lastprinted =~ /^(\s*)/;
($b) = $_ =~ /^(\s*)/;
$blankcount++ if ($a ne $b);
}
# We get here only when we have a non-blank line in hand. If it was preceded
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
# If so, remove all 7 lines, and remember that we have just done a cut.
if ($blankcount >= 3)
{
for ($i = 0; $i < 3; $i++)
{
$next[$i] = <STDIN>;
$next[$i] = "" if !defined $next[$i];
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
}
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
if ($next[0] =~ /^\s*$/ &&
$next[1] =~ /^\s*$/ &&
$next[2] =~ /^\s*$/)
{
$blankcount -= 3;
$lastwascut = 1;
}
# Otherwise output the saved blanks, the current, and the next three
# lines. Remember the last printed line.
else
{
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
print;
for ($i = 0; $i < 3; $i++)
{
$next[$i] =~ s/.\x8//g;
print $next[$i];
$lastprinted = $_;
}
$lastwascut = 0;
$blankcount = 0;
}
}
# This non-blank line is not preceded by 3 or more blank lines. Output
# any blanks there are, and the line. Remember it. Force two blank lines
# before headings.
else
{
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
defined($lastprinted);
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
print;
$lastprinted = $_;
$lastwascut = 0;
$blankcount = 0;
}
}
# End

35
pcre2/Detrail Executable file
View File

@ -0,0 +1,35 @@
#!/usr/bin/perl
# This is a script for removing trailing whitespace from lines in files that
# are listed on the command line.
# This subroutine does the work for one file.
sub detrail {
my($file) = $_[0];
my($changed) = 0;
open(IN, "$file") || die "Can't open $file for input";
@lines = <IN>;
close(IN);
foreach (@lines)
{
if (/\s+\n$/)
{
s/\s+\n$/\n/;
$changed = 1;
}
}
if ($changed)
{
open(OUT, ">$file") || die "Can't open $file for output";
print OUT @lines;
close(OUT);
}
}
# This is the main program
$, = ""; # Output field separator
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
# End

604
pcre2/HACKING Normal file
View File

@ -0,0 +1,604 @@
Technical Notes about PCRE2
---------------------------
These are very rough technical notes that record potentially useful information
about PCRE2 internals. PCRE2 is a library based on the original PCRE library,
but with a revised (and incompatible) API. To avoid confusion, the original
library is referred to as PCRE1 below. For information about testing PCRE2, see
the pcre2test documentation and the comment at the head of the RunTest file.
PCRE1 releases were up to 8.3x when PCRE2 was developed. The 8.xx series will
continue for bugfixes if necessary. PCRE2 releases started at 10.00 to avoid
confusion with PCRE1.
Historical note 1
-----------------
Many years ago I implemented some regular expression functions to an algorithm
suggested by Martin Richards. These were not Unix-like in form, and were quite
restricted in what they could do by comparison with Perl. The interesting part
about the algorithm was that the amount of space required to hold the compiled
form of an expression was known in advance. The code to apply an expression did
not operate by backtracking, as the original Henry Spencer code and current
PCRE2 and Perl code does, but instead checked all possibilities simultaneously
by keeping a list of current states and checking all of them as it advanced
through the subject string. In the terminology of Jeffrey Friedl's book, it was
a "DFA algorithm", though it was not a traditional Finite State Machine (FSM).
When the pattern was all used up, all remaining states were possible matches,
and the one matching the longest subset of the subject string was chosen. This
did not necessarily maximize the individual wild portions of the pattern, as is
expected in Unix and Perl-style regular expressions.
Historical note 2
-----------------
By contrast, the code originally written by Henry Spencer (which was
subsequently heavily modified for Perl) compiles the expression twice: once in
a dummy mode in order to find out how much store will be needed, and then for
real. (The Perl version probably doesn't do this any more; I'm talking about
the original library.) The execution function operates by backtracking and
maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
matches individual wild portions of the pattern. This is an "NFA algorithm" in
Friedl's terminology.
OK, here's the real stuff
-------------------------
For the set of functions that formed the original PCRE1 library (which are
unrelated to those mentioned above), I tried at first to invent an algorithm
that used an amount of store bounded by a multiple of the number of characters
in the pattern, to save on compiling time. However, because of the greater
complexity in Perl regular expressions, I couldn't do this. In any case, a
first pass through the pattern is helpful for other reasons.
Support for 16-bit and 32-bit data strings
-------------------------------------------
The library can be compiled in any combination of 8-bit, 16-bit or 32-bit
modes, creating up to three different libraries. In the description that
follows, the word "short" is used for a 16-bit data quantity, and the phrase
"code unit" is used for a quantity that is a byte in 8-bit mode, a short in
16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are
given in generic form, without the _8, _16, or _32 suffix.
Computing the memory requirement: how it was
--------------------------------------------
Up to and including release 6.7, PCRE1 worked by running a very degenerate
first pass to calculate a maximum memory requirement, and then a second pass to
do the real compile - which might use a bit less than the predicted amount of
memory. The idea was that this would turn out faster than the Henry Spencer
code because the first pass is degenerate and the second pass can just store
stuff straight into memory, which it knows is big enough.
Computing the memory requirement: how it is
-------------------------------------------
By the time I was working on a potential 6.8 release, the degenerate first pass
had become very complicated and hard to maintain. Indeed one of the early
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
I had a flash of inspiration as to how I could run the real compile function in
a "fake" mode that enables it to compute how much memory it would need, while
actually only ever using a few hundred bytes of working memory, and without too
many tests of the mode that might slow it down. So I refactored the compiling
functions to work this way. This got rid of about 600 lines of source. It
should make future maintenance and development easier. As this was such a major
change, I never released 6.8, instead upping the number to 7.0 (other quite
major changes were also present in the 7.0 release).
A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there was a downside: compiling ran
more slowly than before (30% or more, depending on the pattern) because it now
did a full analysis of the pattern. My hope was that this would not be a big
issue, and in the event, nobody has commented on it.
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
(default 250, settable at build time) so as to put a limit on the amount of
system stack used by the compile function, which uses recursive function calls
for nested parenthesized groups. This is a safety feature for environments with
small stacks where the patterns are provided by users.
History repeated itself for release 10.20. A number of bugs relating to named
subpatterns had been discovered by fuzzers. Most of these were related to the
handling of forward references when it was not known if the named pattern was
unique. (References to non-unique names use a different opcode and more
memory.) The use of duplicate group numbers (the (?| facility) also caused
issues.
To get around these problems I adopted a new approach by adding a third pass,
really a "pre-pass", over the pattern, which does nothing other than identify
all the named subpatterns and their corresponding group numbers. This means
that the actual compile (both pre-pass and real compile) have full knowledge of
group names and numbers throughout. Several dozen lines of messy code were
eliminated, though the new pre-pass is not short (skipping over [] classes is
complicated).
Traditional matching function
-----------------------------
The "traditional", and original, matching function is called pcre2_match(), and
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
and the way that Perl works. This is not surprising, since it is intended to be
as compatible with Perl as possible. This is the function most users of PCRE2
will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
support, and studying a compiled pattern with JIT is successful, the JIT code
is run instead of the normal pcre2_match() code, but the result is the same.
Supplementary matching function
-------------------------------
There is also a supplementary matching function called pcre2_dfa_match(). This
implements a DFA matching algorithm that searches simultaneously for all
possible matches that start at one point in the subject string. (Going back to
my roots: see Historical Note 1 above.) This function intreprets the same
compiled pattern data as pcre2_match(); however, not all the facilities are
available, and those that are do not always work in quite the same way. See the
user documentation for details.
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
because it may have a number of states active at one time. More work would be
needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way. JIT
support is not available for this kind of matching.
Changeable options
------------------
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
some others) may change in the middle of patterns. Their processing is handled
entirely at compile time by generating different opcodes for the different
settings. The runtime functions do not need to keep track of an options state.
Format of compiled patterns
---------------------------
The compiled form of a pattern is a vector of unsigned code units (bytes in
8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing
items of variable length. The first code unit in an item contains an opcode,
and the length of the item is either implicit in the opcode or contained in the
data that follows it.
In many cases listed below, LINK_SIZE data values are specified for offsets
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
and the 16-bit library can be compiled to use 4-byte values, though this
impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
necessary only when patterns whose compiled length is greater than 64K code
units are going to be processed. When a LINK_SIZE value uses more than one code
unit, the most significant unit is first.
In this description, we assume the "normal" compilation options. Data values
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
(most significant byte first), or one code unit in 16-bit and 32-bit modes.
Opcodes with no following data
------------------------------
These items are all just one unit long
OP_END end of pattern
OP_ANY match any one character other than newline
OP_ALLANY match any one character, including newline
OP_ANYBYTE match any single code unit, even in UTF-8/16 mode
OP_SOD match start of data: \A
OP_SOM, start of match (subject + offset): \G
OP_SET_SOM, set start of match (\K)
OP_CIRC ^ (start of data)
OP_CIRCM ^ multiline mode (start of data or after newline)
OP_NOT_WORD_BOUNDARY \W
OP_WORD_BOUNDARY \w
OP_NOT_DIGIT \D
OP_DIGIT \d
OP_NOT_HSPACE \H
OP_HSPACE \h
OP_NOT_WHITESPACE \S
OP_WHITESPACE \s
OP_NOT_VSPACE \V
OP_VSPACE \v
OP_NOT_WORDCHAR \W
OP_WORDCHAR \w
OP_EODN match end of data or newline at end: \Z
OP_EOD match end of data: \z
OP_DOLL $ (end of data, or before final newline)
OP_DOLLM $ multiline mode (end of data or before newline)
OP_EXTUNI match an extended Unicode grapheme cluster
OP_ANYNL match any Unicode newline sequence
OP_ASSERT_ACCEPT )
OP_ACCEPT ) These are Perl 5.10's "backtracking control
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
OP_FAIL ) parentheses, it may be preceded by one or more
OP_PRUNE ) OP_CLOSE, each followed by a count that
OP_SKIP ) indicates which parentheses must be closed.
OP_THEN )
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
This ends the assertion, not the entire pattern match. The assertion (?!) is
always optimized to OP_FAIL.
Backtracking control verbs with optional data
---------------------------------------------
(*THEN) without an argument generates the opcode OP_THEN and no following data.
OP_MARK is followed by the mark name, preceded by a length in one code unit,
and followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with
arguments, the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used,
with the name following in the same format as OP_MARK.
Matching literal characters
---------------------------
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
the character may be more than one code unit long. In UTF-32 mode, characters
are always exactly one code unit long.
If there is only one character in a character class, OP_CHAR or OP_CHARI is
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
for something like [^a]).
Repeating single characters
---------------------------
The common repeats (*, +, ?), when applied to a single character, use the
following opcodes, which come in caseful and caseless versions:
Caseful Caseless
OP_STAR OP_STARI
OP_MINSTAR OP_MINSTARI
OP_POSSTAR OP_POSSTARI
OP_PLUS OP_PLUSI
OP_MINPLUS OP_MINPLUSI
OP_POSPLUS OP_POSPLUSI
OP_QUERY OP_QUERYI
OP_MINQUERY OP_MINQUERYI
OP_POSQUERY OP_POSQUERYI
Each opcode is followed by the character that is to be repeated. In ASCII or
UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the
length is variable. Those with "MIN" in their names are the minimizing
versions. Those with "POS" in their names are possessive versions. Other kinds
of repeat make use of these opcodes:
Caseful Caseless
OP_UPTO OP_UPTOI
OP_MINUPTO OP_MINUPTOI
OP_POSUPTO OP_POSUPTOI
OP_EXACT OP_EXACTI
Each of these is followed by a count and then the repeated character. The count
is two bytes long in 8-bit mode (most significant byte first), or one code unit
in 16-bit and 32-bit modes.
OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum
and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or
OP_MINUPTO or OPT_POSUPTO).
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
etc.) are used for repeated, negated, single-character classes such as [^a]*.
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
positive single-character classes.
Repeating character types
-------------------------
Repeats of things like \d are done exactly as for single characters, except
that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored
in the next code unit. The opcodes are:
OP_TYPESTAR
OP_TYPEMINSTAR
OP_TYPEPOSSTAR
OP_TYPEPLUS
OP_TYPEMINPLUS
OP_TYPEPOSPLUS
OP_TYPEQUERY
OP_TYPEMINQUERY
OP_TYPEPOSQUERY
OP_TYPEUPTO
OP_TYPEMINUPTO
OP_TYPEPOSUPTO
OP_TYPEEXACT
Match by Unicode property
-------------------------
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two code units that encode the desired property as a type
and a value. The types are a set of #defines of the form PT_xxx, and the values
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
The value is relevant only for PT_GC (General Category), PT_PC (Particular
Category), and PT_SC (Script).
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
value.
Character classes
-----------------
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
something like [^a]).
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
negated, single-character classes. The normal single-character opcodes
(OP_STAR, etc.) are used for repeated positive single-character classes.
When there is more than one character in a class, and all the code points are
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
negative one. In either case, the opcode is followed by a 32-byte (16-short,
8-word) bit map containing a 1 bit for every character that is acceptable. The
bits are counted from the least significant end of each unit. In caseless mode,
bits for both cases are set.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and
16-bit and 32-bit modes, subject characters with values greater than 255 can be
handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they
do.
For classes containing characters with values greater than 255 or that contain
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
code points are less than 256, followed by a list of pairs (for a range) and/or
single characters and/or properties. In caseless mode, both cases are
explicitly listed.
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
opcode and its data. This is followed by a code unit containing flag bits:
XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a
bit map is present. There follows the bit map, if XCL_MAP is set, and then a
sequence of items coded as follows:
XCL_END marks the end of the list
XCL_SINGLE one character follows
XCL_RANGE two characters follow
XCL_PROP a Unicode property (type, value) follows
XCL_NOTPROP a Unicode property (type, value) follows
If a range starts with a code point less than 256 and ends with one greater
than 255, it is split into two ranges, with characters less than 256 being
indicated in the bit map, and the rest with XCL_RANGE.
When XCL_NOT is set, the bit map, if present, contains bits for characters that
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
specifies characters and properties that are not allowed.
Back references
---------------
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
reference number when the reference is to a unique capturing group (either by
number or by name). When named groups are used, there may be more than one
group with the same name. In this case, a reference to such a group by name
generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
(not the byte offset) in the group name table of the first entry for the
required name, followed by the number of groups with the same name. The
matching code can then search for the first one that is set.
Repeating character classes and back references
-----------------------------------------------
Single-character classes are handled specially (see above). This section
applies to other classes and also to back references. In both cases, the repeat
information follows the base item. The matching code looks at the following
opcode to see if it is one of these:
OP_CRSTAR
OP_CRMINSTAR
OP_CRPOSSTAR
OP_CRPLUS
OP_CRMINPLUS
OP_CRPOSPLUS
OP_CRQUERY
OP_CRMINQUERY
OP_CRPOSQUERY
OP_CRRANGE
OP_CRMINRANGE
OP_CRPOSRANGE
All but the last three are single-code-unit items, with no data. The others are
followed by the minimum and maximum repeat counts.
Brackets and alternation
------------------------
A pair of non-capturing round brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets.
[Note for North Americans: "bracket" to some English speakers, including
myself, can be round, square, curly, or pointy. Hence this usage rather than
"parentheses".]
Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A
bracket opcode is followed by a LINK_SIZE value which gives the offset to the
next alternative OP_ALT or, if there aren't any branches, to the matching
OP_KET opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset
to the next one, or to the OP_KET opcode. For capturing brackets, the bracket
number is a count that immediately follows the offset.
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
respectively (see below for possessive repetitions). All three are followed by
a LINK_SIZE value giving (as a positive number) the offset back to the matching
bracket opcode.
If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-unit opcodes that tell the matcher that skipping the following
subpattern entirely is a valid match. In the case of the first two, not
skipping the pattern is also valid (greedy and non-greedy). The third is used
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
because it may be called as a subroutine from elsewhere in the pattern.
A subpattern with an indefinite maximum repetition is replicated in the
compiled data its minimum number of times (or once with OP_BRAZERO if the
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
as appropriate.
A subpattern with a bounded maximum repetition is replicated in a nested
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
before each replication after the minimum, so that, for example, (abc){2,5} is
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
has the same number.
When a repeated subpattern has an unbounded upper limit, it is checked to see
whether it could match an empty string. If this is the case, the opcode in the
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop.
Possessive brackets
-------------------
When a repeated group (capturing or non-capturing) is marked as possessive by
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
repetition is zero, the group is preceded by OP_BRAPOSZERO.
Once-only (atomic) groups
-------------------------
These are just like other subpatterns, but they start with the opcode
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
within the atomic group; the latter when there are. The distinction is needed
for when there is a backtrack to before the group - any captures within the
group must be reset, so it is necessary to retain backtracking points inside
the group, even after it is complete, in order to do this. When there are no
captures in an atomic group, all the backtracking can be discarded when it is
complete. This is more efficient, and also uses less stack.
The check for matching an empty string in an unbounded repeat is handled
entirely at runtime, so there are just these two opcodes for atomic groups.
Assertions
----------
Forward assertions are also just like other subpatterns, but starting with one
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
is OP_REVERSE, followed by a count of the number of characters to move back the
pointer in the subject string. In ASCII or UTF-32 mode, the count is also the
number of code units, but in UTF-8/16 mode each character may occupy more than
one code unit. A separate count is present in each alternative of a lookbehind
assertion, allowing them to have different (but fixed) lengths.
Conditional subpatterns
-----------------------
These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat.
If the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by a count containing the
reference number, provided that the reference is to a unique capturing group.
If the reference was by name and there is more than one group with that name,
OP_DNCREF is used instead. It is followed by two counts: the index in the group
names table, and the number of groups with the same name. The allows the
matcher to check if any group with the given name is set.
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the
whole pattern") or OP_DNRREF (with data as for OP_DNCREF).
For a DEFINE condition, OP_FALSE is used (with no associated data). During
compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when
the conditional group is complete, there can be a check to ensure that it
contains only one top-level branch. Once this has happened, the opcode is
changed to OP_FALSE, so the matcher never sees OP_DEFINE.
There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which
tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE
or OP_FALSE.
If a condition is not a back reference, recursion test, DEFINE, or VERSION, it
must start with an assertion, whose opcode normally immediately follows OP_COND
or OP_SCOND. However, if automatic callouts are enabled, a callout is inserted
immediately before the assertion. It is also possible to insert a manual
callout at this point. Only assertion conditions may have callouts preceding
the condition.
A condition that is the negative assertion (?!) is optimized to OP_FAIL in all
parts of the pattern, so this is another opcode that may appear as a condition.
It is treated the same as OP_FALSE.
Recursion
---------
Recursion either matches the current pattern, or some subexpression. The opcode
OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
bracket from the start of the whole pattern. OP_RECURSE is also used for
"subroutine" calls, even though they are not strictly a recursion. Repeated
recursions are automatically wrapped inside OP_ONCE brackets, because otherwise
some patterns broke them. A non-repeated recursion is not wrapped in OP_ONCE
brackets, but it is nevertheless still treated as an atomic group.
Callout
-------
A callout can nowadays have either a numerical argument or a string argument.
These use OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are
followed by two LINK_SIZE values giving the offset in the pattern string to the
start of the following item, and another count giving the length of this item.
These values make it possible for pcre2test to output useful tracing
information using callouts.
In the case of a numeric callout, after these two values there is a single code
unit containing the callout number, in the range 0-255, with 255 being used for
callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT
option. Thus, this opcode item is of fixed length:
[OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER]
For callouts with string arguments, OP_CALLOUT_STR has three more data items:
a LINK_SIZE value giving the complete length of the entire opcode item, a
LINK_SIZE item containing the offset within the pattern string to the start of
the string argument, and the string itself, preceded by its starting delimiter
and followed by a binary zero. When a callout function is called, a pointer to
the actual string is passed, but the delimiter can be accessed as string[-1] if
the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is
compiled as the following bytes (decimal numbers represent binary values):
[OP_CALLOUT] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0]
-------- ------- -------- -------
| | | |
------- LINK_SIZE items ------
Opcode table checking
---------------------
The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
not a real opcode, but is used to check that tables indexed by opcode are the
correct length, in order to catch updating errors.
Philip Hazel
June 2015

370
pcre2/INSTALL Normal file
View File

@ -0,0 +1,370 @@
Installation Instructions
*************************
Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
Inc.
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. This file is offered as-is,
without warranty of any kind.
Basic Installation
==================
Briefly, the shell command `./configure && make && make install'
should configure, build, and install this package. The following
more-detailed instructions are generic; see the `README' file for
instructions specific to this package. Some packages provide this
`INSTALL' file but do not implement all of the features documented
below. The lack of an optional feature in a given package is not
necessarily a bug. More recommendations for GNU packages can be found
in *note Makefile Conventions: (standards)Makefile Conventions.
The `configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a `Makefile' in each directory of the package.
It may also create one or more `.h' files containing system-dependent
definitions. Finally, it creates a shell script `config.status' that
you can run in the future to recreate the current configuration, and a
file `config.log' containing compiler output (useful mainly for
debugging `configure').
It can also use an optional file (typically called `config.cache'
and enabled with `--cache-file=config.cache' or simply `-C') that saves
the results of its tests to speed up reconfiguring. Caching is
disabled by default to prevent problems with accidental use of stale
cache files.
If you need to do unusual things to compile the package, please try
to figure out how `configure' could check whether to do them, and mail
diffs or instructions to the address given in the `README' so they can
be considered for the next release. If you are using the cache, and at
some point `config.cache' contains results you don't want to keep, you
may remove or edit it.
The file `configure.ac' (or `configure.in') is used to create
`configure' by a program called `autoconf'. You need `configure.ac' if
you want to change it or regenerate `configure' using a newer version
of `autoconf'.
The simplest way to compile this package is:
1. `cd' to the directory containing the package's source code and type
`./configure' to configure the package for your system.
Running `configure' might take a while. While running, it prints
some messages telling which features it is checking for.
2. Type `make' to compile the package.
3. Optionally, type `make check' to run any self-tests that come with
the package, generally using the just-built uninstalled binaries.
4. Type `make install' to install the programs and any data files and
documentation. When installing into a prefix owned by root, it is
recommended that the package be configured and built as a regular
user, and only the `make install' phase executed with root
privileges.
5. Optionally, type `make installcheck' to repeat any self-tests, but
this time using the binaries in their final installed location.
This target does not install anything. Running this target as a
regular user, particularly if the prior `make install' required
root privileges, verifies that the installation completed
correctly.
6. You can remove the program binaries and object files from the
source code directory by typing `make clean'. To also remove the
files that `configure' created (so you can compile the package for
a different kind of computer), type `make distclean'. There is
also a `make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
7. Often, you can also type `make uninstall' to remove the installed
files again. In practice, not all packages have tested that
uninstallation works correctly, even though it is required by the
GNU Coding Standards.
8. Some packages, particularly those that use Automake, provide `make
distcheck', which can by used by developers to test that all other
targets like `make install' and `make uninstall' work correctly.
This target is generally not run by end users.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that
the `configure' script does not know about. Run `./configure --help'
for details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
is an example:
./configure CC=c99 CFLAGS=-g LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU `make'. `cd' to the
directory where you want the object files and executables to go and run
the `configure' script. `configure' automatically checks for the
source code in the directory that `configure' is in and in `..'. This
is known as a "VPATH" build.
With a non-GNU `make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have
installed the package for one architecture, use `make distclean' before
reconfiguring for another architecture.
On MacOS X 10.5 and later systems, you can create libraries and
executables that work on multiple system types--known as "fat" or
"universal" binaries--by specifying multiple `-arch' options to the
compiler but only a single `-arch' option to the preprocessor. Like
this:
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CPP="gcc -E" CXXCPP="g++ -E"
This is not guaranteed to produce working output in all cases, you
may have to build one architecture at a time and combine the results
using the `lipo' tool if you have problems.
Installation Names
==================
By default, `make install' installs the package's commands under
`/usr/local/bin', include files under `/usr/local/include', etc. You
can specify an installation prefix other than `/usr/local' by giving
`configure' the option `--prefix=PREFIX', where PREFIX must be an
absolute file name.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
PREFIX as the prefix for installing programs and libraries.
Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them. In general, the
default for these options is expressed in terms of `${prefix}', so that
specifying just `--prefix' will affect all of the other directory
specifications that were not explicitly provided.
The most portable way to affect installation locations is to pass the
correct locations to `configure'; however, many packages provide one or
both of the following shortcuts of passing variable assignments to the
`make install' command line to change installation locations without
having to reconfigure or recompile.
The first method involves providing an override variable for each
affected directory. For example, `make install
prefix=/alternate/directory' will choose an alternate location for all
directory configuration variables that were expressed in terms of
`${prefix}'. Any directories that were specified during `configure',
but not in terms of `${prefix}', must each be overridden at install
time for the entire installation to be relocated. The approach of
makefile variable overrides for each directory variable is required by
the GNU Coding Standards, and ideally causes no recompilation.
However, some platforms have known limitations with the semantics of
shared libraries that end up requiring recompilation when using this
method, particularly noticeable in packages that use GNU Libtool.
The second method involves providing the `DESTDIR' variable. For
example, `make install DESTDIR=/alternate/directory' will prepend
`/alternate/directory' before all installation names. The approach of
`DESTDIR' overrides is not required by the GNU Coding Standards, and
does not work on platforms that have drive letters. On the other hand,
it does better at avoiding recompilation issues, and works well even
when some directory options were not specified in terms of `${prefix}'
at `configure' time.
Optional Features
=================
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving `configure' the
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
Some packages pay attention to `--enable-FEATURE' options to
`configure', where FEATURE indicates an optional part of the package.
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
is something like `gnu-as' or `x' (for the X Window System). The
`README' should mention any `--enable-' and `--with-' options that the
package recognizes.
For packages that use the X Window System, `configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the `configure' options `--x-includes=DIR' and
`--x-libraries=DIR' to specify their locations.
Some packages offer the ability to configure how verbose the
execution of `make' will be. For these packages, running `./configure
--enable-silent-rules' sets the default to minimal output, which can be
overridden with `make V=1'; while running `./configure
--disable-silent-rules' sets the default to verbose, which can be
overridden with `make V=0'.
Particular systems
==================
On HP-UX, the default C compiler is not ANSI C compatible. If GNU
CC is not installed, it is recommended to use the following options in
order to use an ANSI C compiler:
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
HP-UX `make' updates targets which have the same time stamps as
their prerequisites, which makes it generally unusable when shipped
generated files such as `configure' are involved. Use GNU `make'
instead.
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
a workaround. If GNU CC is not installed, it is therefore recommended
to try
./configure CC="cc"
and if that doesn't work, try
./configure CC="cc -nodtk"
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
directory contains several dysfunctional programs; working variants of
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
in your `PATH', put it _after_ `/usr/bin'.
On Haiku, software installed for all users goes in `/boot/common',
not `/usr/local'. It is recommended to use the following options:
./configure --prefix=/boot/common
Specifying the System Type
==========================
There may be some features `configure' cannot figure out
automatically, but needs to determine by the type of machine the package
will run on. Usually, assuming the package is built to be run on the
_same_ architectures, `configure' can figure that out, but if it prints
a message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS
KERNEL-OS
See the file `config.sub' for the possible values of each field. If
`config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the option `--target=TYPE' to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with `--host=TYPE'.
Sharing Defaults
================
If you want to set default values for `configure' scripts to share,
you can create a site shell script called `config.site' that gives
default values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script.
A warning: not all `configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to `configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the `configure' command line, using `VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified `gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
an Autoconf limitation. Until the limitation is lifted, you can use
this workaround:
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
`configure' Invocation
======================
`configure' recognizes the following options to control how it
operates.
`--help'
`-h'
Print a summary of all of the options to `configure', and exit.
`--help=short'
`--help=recursive'
Print a summary of the options unique to this package's
`configure', and exit. The `short' variant lists options used
only in the top level, while the `recursive' variant lists options
also present in any nested packages.
`--version'
`-V'
Print the version of Autoconf used to generate the `configure'
script, and exit.
`--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally `config.cache'. FILE defaults to `/dev/null' to
disable caching.
`--config-cache'
`-C'
Alias for `--cache-file=config.cache'.
`--quiet'
`--silent'
`-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to `/dev/null' (any error
messages will still be shown).
`--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
`configure' can determine that directory automatically.
`--prefix=DIR'
Use DIR as the installation prefix. *note Installation Names::
for more details, including other options available for fine-tuning
the installation locations.
`--no-create'
`-n'
Run the configure checks, but stop before creating any output
files.
`configure' also accepts some other, not widely useful, options. Run
`configure --help' for more details.

83
pcre2/LICENCE Normal file
View File

@ -0,0 +1,83 @@
PCRE2 LICENCE
-------------
PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE2, supplied in the "doc"
directory, is distributed under the same terms as the software itself. The data
in the testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to
optimize pattern matching. This is an optional feature that can be omitted when
the library is built.
THE BASIC LIBRARY FUNCTIONS
---------------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2015 University of Cambridge
All rights reserved.
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
--------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2015 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2015 Zoltan Herczeg
All rights reserved.
THE "BSD" LICENCE
-----------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of any
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
End

774
pcre2/Makefile.am Normal file
View File

@ -0,0 +1,774 @@
## Process this file with automake to produce Makefile.in.
AUTOMAKE_OPTIONS = subdir-objects
ACLOCAL_AMFLAGS = -I m4
AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
## Specify the documentation files that are distributed.
dist_doc_DATA = \
AUTHORS \
COPYING \
ChangeLog \
LICENCE \
NEWS \
README \
doc/pcre2.txt \
doc/pcre2-config.txt \
doc/pcre2grep.txt \
doc/pcre2test.txt
dist_html_DATA = \
doc/html/NON-AUTOTOOLS-BUILD.txt \
doc/html/README.txt \
doc/html/index.html \
doc/html/pcre2-config.html \
doc/html/pcre2.html \
doc/html/pcre2_callout_enumerate.html \
doc/html/pcre2_code_free.html \
doc/html/pcre2_compile.html \
doc/html/pcre2_compile_context_copy.html \
doc/html/pcre2_compile_context_create.html \
doc/html/pcre2_compile_context_free.html \
doc/html/pcre2_config.html \
doc/html/pcre2_dfa_match.html \
doc/html/pcre2_general_context_copy.html \
doc/html/pcre2_general_context_create.html \
doc/html/pcre2_general_context_free.html \
doc/html/pcre2_get_error_message.html \
doc/html/pcre2_get_mark.html \
doc/html/pcre2_get_ovector_count.html \
doc/html/pcre2_get_ovector_pointer.html \
doc/html/pcre2_get_startchar.html \
doc/html/pcre2_jit_compile.html \
doc/html/pcre2_jit_free_unused_memory.html \
doc/html/pcre2_jit_match.html \
doc/html/pcre2_jit_stack_assign.html \
doc/html/pcre2_jit_stack_create.html \
doc/html/pcre2_jit_stack_free.html \
doc/html/pcre2_maketables.html \
doc/html/pcre2_match.html \
doc/html/pcre2_match_context_copy.html \
doc/html/pcre2_match_context_create.html \
doc/html/pcre2_match_context_free.html \
doc/html/pcre2_match_data_create.html \
doc/html/pcre2_match_data_create_from_pattern.html \
doc/html/pcre2_match_data_free.html \
doc/html/pcre2_pattern_info.html \
doc/html/pcre2_serialize_decode.html \
doc/html/pcre2_serialize_encode.html \
doc/html/pcre2_serialize_free.html \
doc/html/pcre2_serialize_get_number_of_codes.html \
doc/html/pcre2_set_bsr.html \
doc/html/pcre2_set_callout.html \
doc/html/pcre2_set_character_tables.html \
doc/html/pcre2_set_compile_recursion_guard.html \
doc/html/pcre2_set_match_limit.html \
doc/html/pcre2_set_newline.html \
doc/html/pcre2_set_parens_nest_limit.html \
doc/html/pcre2_set_recursion_limit.html \
doc/html/pcre2_set_recursion_memory_management.html \
doc/html/pcre2_substitute.html \
doc/html/pcre2_substring_copy_byname.html \
doc/html/pcre2_substring_copy_bynumber.html \
doc/html/pcre2_substring_free.html \
doc/html/pcre2_substring_get_byname.html \
doc/html/pcre2_substring_get_bynumber.html \
doc/html/pcre2_substring_length_byname.html \
doc/html/pcre2_substring_length_bynumber.html \
doc/html/pcre2_substring_list_free.html \
doc/html/pcre2_substring_list_get.html \
doc/html/pcre2_substring_nametable_scan.html \
doc/html/pcre2_substring_number_from_name.html \
doc/html/pcre2api.html \
doc/html/pcre2build.html \
doc/html/pcre2callout.html \
doc/html/pcre2compat.html \
doc/html/pcre2demo.html \
doc/html/pcre2grep.html \
doc/html/pcre2jit.html \
doc/html/pcre2limits.html \
doc/html/pcre2matching.html \
doc/html/pcre2partial.html \
doc/html/pcre2pattern.html \
doc/html/pcre2perform.html \
doc/html/pcre2posix.html \
doc/html/pcre2sample.html \
doc/html/pcre2serialize.html \
doc/html/pcre2stack.html \
doc/html/pcre2syntax.html \
doc/html/pcre2test.html \
doc/html/pcre2unicode.html
dist_man_MANS = \
doc/pcre2-config.1 \
doc/pcre2.3 \
doc/pcre2_callout_enumerate.3 \
doc/pcre2_code_free.3 \
doc/pcre2_compile.3 \
doc/pcre2_compile_context_copy.3 \
doc/pcre2_compile_context_create.3 \
doc/pcre2_compile_context_free.3 \
doc/pcre2_config.3 \
doc/pcre2_dfa_match.3 \
doc/pcre2_general_context_copy.3 \
doc/pcre2_general_context_create.3 \
doc/pcre2_general_context_free.3 \
doc/pcre2_get_error_message.3 \
doc/pcre2_get_mark.3 \
doc/pcre2_get_ovector_count.3 \
doc/pcre2_get_ovector_pointer.3 \
doc/pcre2_get_startchar.3 \
doc/pcre2_jit_compile.3 \
doc/pcre2_jit_free_unused_memory.3 \
doc/pcre2_jit_match.3 \
doc/pcre2_jit_stack_assign.3 \
doc/pcre2_jit_stack_create.3 \
doc/pcre2_jit_stack_free.3 \
doc/pcre2_maketables.3 \
doc/pcre2_match.3 \
doc/pcre2_match_context_copy.3 \
doc/pcre2_match_context_create.3 \
doc/pcre2_match_context_free.3 \
doc/pcre2_match_data_create.3 \
doc/pcre2_match_data_create_from_pattern.3 \
doc/pcre2_match_data_free.3 \
doc/pcre2_pattern_info.3 \
doc/pcre2_serialize_decode.3 \
doc/pcre2_serialize_encode.3 \
doc/pcre2_serialize_free.3 \
doc/pcre2_serialize_get_number_of_codes.3 \
doc/pcre2_set_bsr.3 \
doc/pcre2_set_callout.3 \
doc/pcre2_set_character_tables.3 \
doc/pcre2_set_compile_recursion_guard.3 \
doc/pcre2_set_match_limit.3 \
doc/pcre2_set_newline.3 \
doc/pcre2_set_parens_nest_limit.3 \
doc/pcre2_set_recursion_limit.3 \
doc/pcre2_set_recursion_memory_management.3 \
doc/pcre2_substitute.3 \
doc/pcre2_substring_copy_byname.3 \
doc/pcre2_substring_copy_bynumber.3 \
doc/pcre2_substring_free.3 \
doc/pcre2_substring_get_byname.3 \
doc/pcre2_substring_get_bynumber.3 \
doc/pcre2_substring_length_byname.3 \
doc/pcre2_substring_length_bynumber.3 \
doc/pcre2_substring_list_free.3 \
doc/pcre2_substring_list_get.3 \
doc/pcre2_substring_nametable_scan.3 \
doc/pcre2_substring_number_from_name.3 \
doc/pcre2api.3 \
doc/pcre2build.3 \
doc/pcre2callout.3 \
doc/pcre2compat.3 \
doc/pcre2demo.3 \
doc/pcre2grep.1 \
doc/pcre2jit.3 \
doc/pcre2limits.3 \
doc/pcre2matching.3 \
doc/pcre2partial.3 \
doc/pcre2pattern.3 \
doc/pcre2perform.3 \
doc/pcre2posix.3 \
doc/pcre2sample.3 \
doc/pcre2serialize.3 \
doc/pcre2stack.3 \
doc/pcre2syntax.3 \
doc/pcre2test.1 \
doc/pcre2unicode.3
# The Libtool libraries to install. We'll add to this later.
lib_LTLIBRARIES =
# Unit tests you want to run when people type 'make check'.
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
TESTS =
check_SCRIPTS =
dist_noinst_SCRIPTS =
# Some of the binaries we make are to be installed, and others are
# (non-user-visible) helper programs needed to build the libraries.
bin_PROGRAMS =
noinst_PROGRAMS =
# Additional files to delete on 'make clean', 'make distclean',
# and 'make maintainer-clean'.
CLEANFILES =
DISTCLEANFILES = src/config.h.in~ config.h
MAINTAINERCLEANFILES =
# Additional files to bundle with the distribution, over and above what
# the Autotools include by default.
EXTRA_DIST =
# These files contain additional m4 macros that are used by autoconf.
EXTRA_DIST += \
m4/ax_pthread.m4 m4/pcre2_visibility.m4
# These files contain maintenance information
EXTRA_DIST += \
NON-AUTOTOOLS-BUILD \
HACKING
# These files are used in the preparation of a release
EXTRA_DIST += \
PrepareRelease \
CheckMan \
CleanTxt \
Detrail \
132html \
doc/index.html.src
# These files are usable versions of pcre2.h and config.h that are distributed
# for the benefit of people who are building PCRE2 manually, without the
# Autotools support.
EXTRA_DIST += \
src/pcre2.h.generic \
src/config.h.generic
# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE
# version number. Therefore, we can create the generic version just by copying.
src/pcre2.h.generic: src/pcre2.h.in configure.ac
rm -f $@
cp -p src/pcre2.h $@
# It is more complicated for config.h.generic. We need the version that results
# from a default configuration so as to get all the default values for PCRE
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
# doing a configure in a temporary directory. However, some trickery is needed,
# because the source directory may already be configured. If you just try
# running configure in a new directory, it complains. For this reason, we move
# config.status out of the way while doing the default configuration. The
# resulting config.h is munged by perl to put #ifdefs round any #defines for
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
# sure that PCRE2_EXP_DEFN is unset (in case it has visibility settings).
src/config.h.generic: configure.ac
rm -rf $@ _generic
mkdir _generic
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
cd _generic && $(abs_top_srcdir)/configure || :
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
test -f _generic/src/config.h
perl -n \
-e 'BEGIN{$$blank=0;}' \
-e 'if(/PCRE2_EXP_DEFN/){print"/* #undef PCRE2_EXP_DEFN */\n";$$blank=0;next;}' \
-e 'if(/to make a symbol visible/){next;}' \
-e 'if(/__attribute__ \(\(visibility/){next;}' \
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
_generic/src/config.h >$@
rm -rf _generic
MAINTAINERCLEANFILES += src/pcre2.h.generic src/config.h.generic
# These are the header files we'll install. We do not distribute pcre2.h
# because it is generated from pcre2.h.in.
nodist_include_HEADERS = src/pcre2.h
include_HEADERS = src/pcre2posix.h
# This is the "config" script.
bin_SCRIPTS = pcre2-config
## ---------------------------------------------------------------
## The dftables program is used to rebuild character tables before compiling
## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
## program. The default (when --enable-rebuild-chartables is not specified) is
## to copy a distributed set of tables that are defined for ASCII code. In this
## case, dftables is not needed.
if WITH_REBUILD_CHARTABLES
noinst_PROGRAMS += dftables
dftables_SOURCES = src/dftables.c
src/pcre2_chartables.c: dftables$(EXEEXT)
rm -f $@
./dftables$(EXEEXT) $@
else
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_REBUILD_CHARTABLES
BUILT_SOURCES = src/pcre2_chartables.c
NODIST_SOURCES = src/pcre2_chartables.c
## Define the list of common sources, then arrange to build whichever of the
## 8-, 16-, or 32-bit libraries are configured.
COMMON_SOURCES = \
src/pcre2_auto_possess.c \
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
src/pcre2_maketables.c \
src/pcre2_match.c \
src/pcre2_match_data.c \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
src/pcre2_serialize.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
src/pcre2_substitute.c \
src/pcre2_substring.c \
src/pcre2_tables.c \
src/pcre2_ucd.c \
src/pcre2_ucp.h \
src/pcre2_valid_utf.c \
src/pcre2_xclass.c
if WITH_PCRE2_8
lib_LTLIBRARIES += libpcre2-8.la
libpcre2_8_la_SOURCES = \
$(COMMON_SOURCES)
nodist_libpcre2_8_la_SOURCES = \
$(NODIST_SOURCES)
libpcre2_8_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=8 \
$(VISIBILITY_CFLAGS) \
$(AM_CFLAGS)
libpcre2_8_la_LIBADD =
endif # WITH_PCRE2_8
if WITH_PCRE2_16
lib_LTLIBRARIES += libpcre2-16.la
libpcre2_16_la_SOURCES = \
$(COMMON_SOURCES)
nodist_libpcre2_16_la_SOURCES = \
$(NODIST_SOURCES)
libpcre2_16_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=16 \
$(VISIBILITY_CFLAGS) \
$(AM_CFLAGS)
libpcre2_16_la_LIBADD =
endif # WITH_PCRE2_16
if WITH_PCRE2_32
lib_LTLIBRARIES += libpcre2-32.la
libpcre2_32_la_SOURCES = \
$(COMMON_SOURCES)
nodist_libpcre2_32_la_SOURCES = \
$(NODIST_SOURCES)
libpcre2_32_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=32 \
$(VISIBILITY_CFLAGS) \
$(AM_CFLAGS)
libpcre2_32_la_LIBADD =
endif # WITH_PCRE2_32
# The pcre2_chartables.c.dist file is the default version of
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.
EXTRA_DIST += src/pcre2_chartables.c.dist
CLEANFILES += src/pcre2_chartables.c
# The JIT compiler lives in a separate directory, but its files are #included
# when pcre2_jit_compile.c is processed, so they must be distributed.
EXTRA_DIST += \
src/sljit/sljitConfig.h \
src/sljit/sljitConfigInternal.h \
src/sljit/sljitExecAllocator.c \
src/sljit/sljitLir.c \
src/sljit/sljitLir.h \
src/sljit/sljitNativeARM_32.c \
src/sljit/sljitNativeARM_64.c \
src/sljit/sljitNativeARM_T2_32.c \
src/sljit/sljitNativeMIPS_32.c \
src/sljit/sljitNativeMIPS_64.c \
src/sljit/sljitNativeMIPS_common.c \
src/sljit/sljitNativePPC_32.c \
src/sljit/sljitNativePPC_64.c \
src/sljit/sljitNativePPC_common.c \
src/sljit/sljitNativeSPARC_32.c \
src/sljit/sljitNativeSPARC_common.c \
src/sljit/sljitNativeTILEGX-encoder.c \
src/sljit/sljitNativeTILEGX_64.c \
src/sljit/sljitNativeX86_32.c \
src/sljit/sljitNativeX86_64.c \
src/sljit/sljitNativeX86_common.c \
src/sljit/sljitUtils.c
# Some of the JIT sources are also in separate files that are #included.
EXTRA_DIST += \
src/pcre2_jit_match.c \
src/pcre2_jit_misc.c
if WITH_PCRE2_8
libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS)
endif # WITH_PCRE2_8
if WITH_PCRE2_16
libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS)
endif # WITH_PCRE2_16
if WITH_PCRE2_32
libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS)
endif # WITH_PCRE2_32
if WITH_VALGRIND
if WITH_PCRE2_8
libpcre2_8_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE2_8
if WITH_PCRE2_16
libpcre2_16_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE2_16
if WITH_PCRE2_32
libpcre2_32_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE2_32
endif # WITH_VALGRIND
if WITH_GCOV
if WITH_PCRE2_8
libpcre2_8_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE2_8
if WITH_PCRE2_16
libpcre2_16_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE2_16
if WITH_PCRE2_32
libpcre2_32_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE2_32
endif # WITH_GCOV
## A version of the 8-bit library that has a POSIX API.
if WITH_PCRE2_8
lib_LTLIBRARIES += libpcre2-posix.la
libpcre2_posix_la_SOURCES = src/pcre2posix.c
libpcre2_posix_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=8 \
$(VISIBILITY_CFLAGS) $(AM_CFLAGS)
libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
libpcre2_posix_la_LIBADD = libpcre2-8.la
if WITH_GCOV
libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_GCOV
endif # WITH_PCRE2_8
## Build pcre2grep if the 8-bit library is enabled
if WITH_PCRE2_8
bin_PROGRAMS += pcre2grep
pcre2grep_SOURCES = src/pcre2grep.c
pcre2grep_CFLAGS = $(AM_CFLAGS)
pcre2grep_LDADD = $(LIBZ) $(LIBBZ2)
pcre2grep_LDADD += libpcre2-8.la
if WITH_GCOV
pcre2grep_CFLAGS += $(GCOV_CFLAGS)
pcre2grep_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_PCRE2_8
## -------- Testing ----------
## If JIT support is enabled, arrange for the JIT test program to run.
if WITH_JIT
TESTS += pcre2_jit_test
noinst_PROGRAMS += pcre2_jit_test
pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
pcre2_jit_test_CFLAGS = $(AM_CFLAGS)
pcre2_jit_test_LDADD =
if WITH_PCRE2_8
pcre2_jit_test_LDADD += libpcre2-8.la
endif # WITH_PCRE2_8
if WITH_PCRE2_16
pcre2_jit_test_LDADD += libpcre2-16.la
endif # WITH_PCRE2_16
if WITH_PCRE2_32
pcre2_jit_test_LDADD += libpcre2-32.la
endif # WITH_PCRE2_32
if WITH_GCOV
pcre2_jit_test_CFLAGS += $(GCOV_CFLAGS)
pcre2_jit_test_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_JIT
# Build the general pcre2test program. The file src/pcre2_printint.c is
# #included by pcre2test as many times as needed, at different code unit
# widths.
bin_PROGRAMS += pcre2test
EXTRA_DIST += src/pcre2_printint.c
pcre2test_SOURCES = src/pcre2test.c
pcre2test_CFLAGS = $(AM_CFLAGS)
pcre2test_LDADD = $(LIBREADLINE)
if WITH_PCRE2_8
pcre2test_LDADD += libpcre2-8.la libpcre2-posix.la
endif # WITH_PCRE2_8
if WITH_PCRE2_16
pcre2test_LDADD += libpcre2-16.la
endif # WITH_PCRE2_16
if WITH_PCRE2_32
pcre2test_LDADD += libpcre2-32.la
endif # WITH_PCRE2_32
if WITH_VALGRIND
pcre2test_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_VALGRIND
if WITH_GCOV
pcre2test_CFLAGS += $(GCOV_CFLAGS)
pcre2test_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
## The main library tests. Each test is a binary plus a script that runs that
## binary in various ways. We install these test binaries in case folks find it
## helpful.
TESTS += RunTest
dist_noinst_SCRIPTS += RunTest
EXTRA_DIST += RunTest.bat
## When the 8-bit library is configured, pcre2grep will have been built.
if WITH_PCRE2_8
TESTS += RunGrepTest
dist_noinst_SCRIPTS += RunGrepTest
endif # WITH_PCRE2_8
## Distribute all the test data files
EXTRA_DIST += \
testdata/grepbinary \
testdata/grepfilelist \
testdata/grepinput \
testdata/grepinput3 \
testdata/grepinput8 \
testdata/grepinputv \
testdata/grepinputx \
testdata/greplist \
testdata/grepoutput \
testdata/grepoutput8 \
testdata/grepoutputN \
testdata/greppatN4 \
testdata/testinput1 \
testdata/testinput2 \
testdata/testinput3 \
testdata/testinput4 \
testdata/testinput5 \
testdata/testinput6 \
testdata/testinput7 \
testdata/testinput8 \
testdata/testinput9 \
testdata/testinput10 \
testdata/testinput11 \
testdata/testinput12 \
testdata/testinput13 \
testdata/testinput14 \
testdata/testinput15 \
testdata/testinput16 \
testdata/testinput17 \
testdata/testinput18 \
testdata/testinput19 \
testdata/testinputEBC \
testdata/testoutput1 \
testdata/testoutput2 \
testdata/testoutput3 \
testdata/testoutput3A \
testdata/testoutput3B \
testdata/testoutput4 \
testdata/testoutput5 \
testdata/testoutput6 \
testdata/testoutput7 \
testdata/testoutput8-16 \
testdata/testoutput8-32 \
testdata/testoutput8-8 \
testdata/testoutput9 \
testdata/testoutput10 \
testdata/testoutput11-16 \
testdata/testoutput11-32 \
testdata/testoutput12-16 \
testdata/testoutput12-32 \
testdata/testoutput13 \
testdata/testoutput14 \
testdata/testoutput15 \
testdata/testoutput16 \
testdata/testoutput17 \
testdata/testoutput18 \
testdata/testoutput19 \
testdata/testoutputEBC \
testdata/wintestinput3 \
testdata/wintestoutput3 \
perltest.sh
# RunTest and RunGrepTest should clean up after themselves, but just in case
# they don't, add their working files to CLEANFILES.
CLEANFILES += \
testSinput \
test3input \
test3output \
test3outputA \
test3outputB \
testtry \
teststdout \
teststderr \
teststderrgrep \
testtemp1grep \
testtemp2grep \
testtrygrep \
testNinputgrep
## ------------ End of testing -------------
# PCRE2 demonstration program. Not built automatcally. The point is that the
# users should build it themselves. So just distribute the source.
EXTRA_DIST += src/pcre2demo.c
# We have .pc files for pkg-config users.
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA =
if WITH_PCRE2_8
pkgconfig_DATA += libpcre2-8.pc libpcre2-posix.pc
endif
if WITH_PCRE2_16
pkgconfig_DATA += libpcre2-16.pc
endif
if WITH_PCRE2_32
pkgconfig_DATA += libpcre2-32.pc
endif
# gcov/lcov code coverage reporting
#
# Coverage reporting targets:
#
# coverage: Create a coverage report from 'make check'
# coverage-baseline: Capture baseline coverage information
# coverage-reset: This zeros the coverage counters only
# coverage-report: This creates the coverage report only
# coverage-clean-report: This removes the generated coverage report
# without cleaning the coverage data itself
# coverage-clean-data: This removes the captured coverage data without
# removing the coverage files created at compile time (*.gcno)
# coverage-clean: This cleans all coverage data including the generated
# coverage report.
if WITH_GCOV
COVERAGE_TEST_NAME = $(PACKAGE)
COVERAGE_NAME = $(PACKAGE)-$(VERSION)
COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info
COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage
COVERAGE_LCOV_EXTRA_FLAGS =
COVERAGE_GENHTML_EXTRA_FLAGS =
coverage_quiet = $(coverage_quiet_$(V))
coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY))
coverage_quiet_0 = --quiet
coverage-check: all
-$(MAKE) $(AM_MAKEFLAGS) -k check
coverage-baseline:
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--output-file "$(COVERAGE_OUTPUT_FILE)" \
--capture \
--initial
coverage-report:
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--capture \
--output-file "$(COVERAGE_OUTPUT_FILE).tmp" \
--test-name "$(COVERAGE_TEST_NAME)" \
--no-checksum \
--compat-libtool \
$(COVERAGE_LCOV_EXTRA_FLAGS)
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--output-file "$(COVERAGE_OUTPUT_FILE)" \
--remove "$(COVERAGE_OUTPUT_FILE).tmp" \
"/tmp/*" \
"/usr/include/*" \
"$(includedir)/*"
-@rm -f "$(COVERAGE_OUTPUT_FILE).tmp"
LANG=C $(GENHTML) $(coverage_quiet) \
--prefix $(top_builddir) \
--output-directory "$(COVERAGE_OUTPUT_DIR)" \
--title "$(PACKAGE) $(VERSION) Code Coverage Report" \
--show-details "$(COVERAGE_OUTPUT_FILE)" \
--legend \
$(COVERAGE_GENHTML_EXTRA_FLAGS)
@echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html"
coverage-reset:
-$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir)
coverage-clean-report:
-rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp"
-rm -rf "$(COVERAGE_OUTPUT_DIR)"
coverage-clean-data:
-find $(top_builddir) -name "*.gcda" -delete
coverage-clean: coverage-reset coverage-clean-report coverage-clean-data
-find $(top_builddir) -name "*.gcno" -delete
coverage-distclean: coverage-clean
coverage: coverage-reset coverage-baseline coverage-check coverage-report
clean-local: coverage-clean
distclean-local: coverage-distclean
.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean
# Without coverage support, still arrange for 'make distclean' to get rid of
# any coverage files that may have been left from a different configuration.
else
coverage:
@echo "Configuring with --enable-coverage is required to generate code coverage report."
DISTCLEANFILES += src/*.gcda src/*.gcno
distclean-local:
rm -rf $(PACKAGE)-$(VERSION)-coverage*
endif # WITH_GCOV
## CMake support
EXTRA_DIST += \
cmake/COPYING-CMAKE-SCRIPTS \
cmake/FindPackageHandleStandardArgs.cmake \
cmake/FindReadline.cmake \
cmake/FindEditline.cmake \
CMakeLists.txt \
config-cmake.h.in
## end Makefile.am

3087
pcre2/Makefile.in Normal file

File diff suppressed because it is too large Load Diff

47
pcre2/NEWS Normal file
View File

@ -0,0 +1,47 @@
News about PCRE2 releases
-------------------------
Version 10.20 30-June-2015
--------------------------
1. Callouts with string arguments and the pcre2_callout_enumerate() function
have been implemented.
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
subject in multiline mode.
4. The way named subpatterns are handled has been refactored. The previous
approach had several bugs.
5. The handling of \c in EBCDIC environments has been changed to conform to the
perlebcdic document. This is an incompatible change.
6. Bugs have been mended, many of them discovered by fuzzers.
Version 10.10 06-March-2015
---------------------------
1. Serialization and de-serialization functions have been added to the API,
making it possible to save and restore sets of compiled patterns, though
restoration must be done in the same environment that was used for compilation.
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
creator to specify that JIT is not to be used.
3. A number of bugs have been fixed. In particular, bugs that caused building
on Windows using CMake to fail have been mended.
Version 10.00 05-January-2015
-----------------------------
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
API, up to item 20 for release 8.36. New programs are recommended to use the
new library. Programs that use the original (PCRE1) API will need changing
before linking with the new library.
****

391
pcre2/NON-AUTOTOOLS-BUILD Normal file
View File

@ -0,0 +1,391 @@
Building PCRE2 without using autotools
--------------------------------------
This document has been converted from the PCRE1 document. I have removed a
number of sections about building in various environments, as they applied only
to PCRE1 and are probably out of date.
This document contains the following sections:
General
Generic instructions for the PCRE2 C library
Stack size in Windows environments
Linking programs in Windows environments
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE2 on Windows with CMake
Testing with RunTest.bat
Building PCRE2 on native z/OS and z/VM
GENERAL
The basic PCRE2 library consists entirely of code written in Standard C, and so
should compile successfully on any system that has a Standard C compiler and
library.
The PCRE2 distribution includes a "configure" file for use by the
configure/make (autotools) build system, as found in many Unix-like
environments. The README file contains information about the options for
"configure".
There is also support for CMake, which some users prefer, especially in Windows
environments, though it can also be run in Unix-like environments. See the
section entitled "Building PCRE2 on Windows with CMake" below.
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
under the names src/config.h.generic and src/pcre2.h.generic. These are
provided for those who build PCRE2 without using "configure" or CMake. If you
use "configure" or CMake, the .generic versions are not used.
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
The following are generic instructions for building the PCRE2 C library "by
hand". If you are going to use CMake, this section does not apply to you; you
can skip ahead to the CMake section.
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
macro settings that it contains to whatever is appropriate for your
environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line
terminators.
When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
An alternative approach is not to edit src/config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in src/config.h are used has changed between releases. (In the
configure/make world, this is handled automatically.) When upgrading to a
new release, you are strongly advised to review src/config.h.generic
before re-using what you had previously.
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
(3) EITHER:
Copy or rename file src/pcre2_chartables.c.dist as
src/pcre2_chartables.c.
OR:
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
if you have set up src/config.h), and then run it with the single
argument "src/pcre2_chartables.c". This generates a set of standard
character tables and writes them to that file. The tables are generated
using the default C locale for your system. If you want to use a locale
that is specified by LC_xxx environment variables, add the -L option to
the dftables command. You must use this method if you are building on a
system that uses EBCDIC code.
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
specify alternative tables at run time.
(4) For an 8-bit library, compile the following source files from the src
directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
set -DHAVE_CONFIG_H if you have set up src/config.h with your
configuration, or else use other -D settings to change the configuration
as required.
pcre2_auto_possess.c
pcre2_chartables.c
pcre2_compile.c
pcre2_config.c
pcre2_context.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_jit_compile.c
pcre2_maketables.c
pcre2_match.c
pcre2_match_data.c
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substitute.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
pcre2_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE2 header files are first
sought in the src directory under the current directory. Otherwise you run
the risk of picking up a previously-installed file from somewhere else.
Note that you must compile pcre2_jit_compile.c, even if you have not
defined SUPPORT_JIT in src/config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre2_compile.c #includes other files from the sljit subdirectory, where
there should be 16 files, all of whose names begin with "sljit". It also
#includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should
not compile these yourself.
(5) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
If your system has static and shared libraries, you may have to do this
once for each type.
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
instead of the 8-bit library) just supply 16 or 32 as the value of
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
(7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the src/pcre2posix.h file and then
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
library.
(8) The pcre2test program can be linked with any combination of the 8-bit,
16-bit and 32-bit libraries (depending on what you selected in
src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
needs the pcre2posix wrapper library.
(9) Run pcre2test on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE2"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcre2test with the -16 option to do
16-bit tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for Unicode support, and will not run if you have
built PCRE2 without it. See the comments at the start of each testinput
file. If you have a suitable Unix-like shell, the RunTest script will run
the appropriate tests for you. The command "RunTest list" will output a
list of all the tests.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
system uses a different convention.
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
by running pcre2test with the -jit option. This is done automatically by
the RunTest script. You might also like to build and run the freestanding
JIT test program, src/pcre2_jit_test.c.
(11) If you want to use the pcre2grep command, compile and link
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
need the pcre2posix library).
STACK SIZE IN WINDOWS ENVIRONMENTS
The default processor stack size of 1Mb in some Windows environments is too
small for matching patterns that need much recursion. In particular, test 2 may
fail because of this. Normally, running out of stack causes a crash, but there
have been cases where the test program has just died silently. See your linker
documentation for how to increase stack size if you experience problems. If you
are using CMake (see "BUILDING PCRE2 ON WINDOWS WITH CMAKE" below) and the gcc
compiler, you can increase the stack size for pcre2test and pcre2grep by
setting the CMAKE_EXE_LINKER_FLAGS variable to "-Wl,--stack,8388608" (for
example). The Linux default of 8Mb is a reasonable choice for the stack, though
even that can be too small for some pattern/subject combinations.
PCRE2 has a compile configuration option to disable the use of stack for
recursion so that heap is used instead. However, pattern matching is
significantly slower when this is done. There is more about stack usage in the
"pcre2stack" documentation.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE2 library in the form of
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in the
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
There are two ways of building PCRE2 using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE2 under Windows.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
On both MinGW and Cygwin, PCRE2 should build correctly using:
./configure && make && make install
This should create two libraries called libpcre2-8 and libpcre2-posix. These
are independent libraries: when you link with libpcre2-posix you must also link
with libpcre2-8, which contains the basic functions.
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE2 to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE2 are in UNIX format, with LF
characters as line terminators. Unless your PCRE2 library uses a default
newline option that includes LF as a valid newline, it may be necessary to
change the line terminators in the test files to get some of the tests to work.
BUILDING PCRE2 ON WINDOWS WITH CMAKE
CMake is an alternative configuration facility that can be used instead of
"configure". CMake creates project files (make files, solution files, etc.)
tailored to numerous development environments, including Visual Studio,
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the
cache can be deleted by selecting "File > Delete Cache".
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
to start Cmake from the Windows Start menu, as this can lead to errors.
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
6. Hit the "Configure" button.
7. Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
8. The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
9. Hit "Configure" again. The adjacent "Generate" button should now be
active.
10. Hit "Generate".
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
12. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
on your configuration options, possibly other test programs) in the build
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
exe paths.
For manual testing with RunTest.bat, provided the build dir is a subdirectory
of the source directory: Open command shell window. Chdir to the location
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
To run only a particular test with RunTest.Bat provide a test number argument.
Otherwise:
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
have been created.
2. Edit RunTest.bat to indentify the full or relative location of
the pcre2 source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre2\pcre2-10.00
3. In a Windows command environment, chdir to the location of your bat and
exe programs.
4. Run RunTest.bat. Test outputs will automatically be compared to expected
results, and discrepancies will be identified in the console output.
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
applications can be supported through UNIX System Services, and in such an
environment PCRE2 can be built in the same way as in other systems. However, in
native z/OS (without UNIX System Services) and in z/VM, special ports are
required. For details, please see this web site:
http://www.zaconsultants.net
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course.
You may also download PCRE1 from WWW.CBTTAPE.ORG, file 882. Everything, source
and executable, is in EBCDIC and native z/OS file formats and this is the
recommended download site.
=============================
Last Updated: 15 June 2015

239
pcre2/PrepareRelease Executable file
View File

@ -0,0 +1,239 @@
#/bin/sh
# Script to prepare the files for building a PCRE2 release. It does some
# processing of the documentation, detrails files, and creates pcre2.h.generic
# and config.h.generic (for use by builders who can't run ./configure).
# You must run this script before runnning "make dist". If its first argument
# is "doc", it stops after preparing the documentation. There are no other
# arguments. The script makes use of the following files:
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
# "knows" the relevant troff constructs that are used in the PCRE2
# man pages.
# CheckMan A Perl script that checks man pages for typos in the mark up.
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
# removing backspaces and other redundant text so as to produce
# a readable .txt file.
# Detrail A Perl script that removes trailing spaces from files.
# doc/index.html.src
# A file that is copied as index.html into the doc/html directory
# when the HTML documentation is built. It works like this so that
# doc/html can be deleted and re-created from scratch.
# README & NON-AUTOTOOLS-BUILD
# These files are copied into the doc/html directory, with .txt
# extensions so that they can by hyperlinked from the HTML
# documentation, because some people just go to the HTML without
# looking for text files.
# First, sort out the documentation. Remove pcre2demo.3 first because it won't
# pass the markup check (it is created below, using markup that none of the
# other pages use).
cd doc
echo Processing documentation
/bin/rm -f pcre2demo.3
# Check the remaining man pages
perl ../CheckMan *.1 *.3
if [ $? != 0 ] ; then exit 1; fi
# Make Text form of the documentation. It needs some mangling to make it
# tidy for online reading. Concatenate all the .3 stuff, but omit the
# individual function pages.
cat <<End >pcre2.txt
-----------------------------------------------------------------------------
This file contains a concatenation of the PCRE2 man pages, converted to plain
text format for ease of searching with a text editor, or for use on systems
that do not have a man page processor. The small individual files that give
synopses of each function in the library have not been included. Neither has
the pcre2demo program. There are separate text files for the pcre2grep and
pcre2test commands.
-----------------------------------------------------------------------------
End
echo "Making pcre2.txt"
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
pcre2limits pcre2matching pcre2partial pcre2unicode ; do
#for file in \
# pcre2syntax \
# pcre2precompile pcre2perform pcre2posix pcre2sample \
# pcre2stack ; do
echo " Processing $file.3"
nroff -c -man $file.3 >$file.rawtxt
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
/bin/rm $file.rawtxt
echo "------------------------------------------------------------------------------" >>pcre2.txt
if [ "$file" != "pcre2sample" ] ; then
echo " " >>pcre2.txt
echo " " >>pcre2.txt
fi
done
# The three commands
for file in pcre2test pcre2grep pcre2-config ; do
echo Making $file.txt
nroff -c -man $file.1 >$file.rawtxt
perl ../CleanTxt <$file.rawtxt >$file.txt
/bin/rm $file.rawtxt
done
# Make pcre2demo.3 from the pcre2demo.c source file
echo "Making pcre2demo.3"
perl <<"END" >pcre2demo.3
open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
print OUT ".\\\" Start example.\n" .
".de EX\n" .
". nr mE \\\\n(.f\n" .
". nf\n" .
". nh\n" .
". ft CW\n" .
"..\n" .
".\n" .
".\n" .
".\\\" End example.\n" .
".de EE\n" .
". ft \\\\n(mE\n" .
". fi\n" .
". hy \\\\n(HY\n" .
"..\n" .
".\n" .
".EX\n" ;
while (<IN>)
{
s/\\/\\e/g;
print OUT;
}
print OUT ".EE\n";
close(IN);
close(OUT);
END
if [ $? != 0 ] ; then exit 1; fi
# Make HTML form of the documentation.
echo "Making HTML documentation"
/bin/rm html/*
cp index.html.src html/index.html
cp ../README html/README.txt
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
for file in *.1 ; do
base=`basename $file .1`
echo " Making $base.html"
perl ../132html -toc $base <$file >html/$base.html
done
# Exclude table of contents for function summaries. It seems that expr
# forces an anchored regex. Also exclude them for small pages that have
# only one section.
for file in *.3 ; do
base=`basename $file .3`
toc=-toc
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
if [ "$base" = "pcre2sample" ] || \
[ "$base" = "pcre2stack" ] || \
[ "$base" = "pcre2compat" ] || \
[ "$base" = "pcre2limits" ] || \
[ "$base" = "pcre2perform" ] || \
[ "$base" = "pcre2unicode" ] ; then
toc=""
fi
echo " Making $base.html"
perl ../132html $toc $base <$file >html/$base.html
if [ $? != 0 ] ; then exit 1; fi
done
# End of documentation processing; stop if only documentation required.
cd ..
echo Documentation done
if [ "$1" = "doc" ] ; then exit; fi
# These files are detrailed; do not detrail the test data because there may be
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing.
files="\
Makefile.am \
configure.ac \
README \
LICENCE \
COPYING \
AUTHORS \
NEWS \
NON-AUTOTOOLS-BUILD \
INSTALL \
132html \
CleanTxt \
Detrail \
ChangeLog \
CMakeLists.txt \
RunGrepTest \
RunTest \
pcre2-config.in \
perltest.sh \
libpcre2-8.pc.in \
libpcre2-16.pc.in \
libpcre2-32.pc.in \
libpcre2-posix.pc.in \
src/dftables.c \
src/pcre2.h.in \
src/pcre2_auto_possess.c \
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
src/pcre2_jit_match.c \
src/pcre2_jit_misc.c \
src/pcre2_jit_test.c \
src/pcre2_maketables.c \
src/pcre2_match.c \
src/pcre2_match_data.c \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
src/pcre2_printint.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
src/pcre2_substring.c \
src/pcre2_tables.c \
src/pcre2_ucd.c \
src/pcre2_ucp.h \
src/pcre2_valid_utf.c \
src/pcre2_xclass.c \
src/pcre2demo.c \
src/pcre2grep.c \
src/pcre2posix.c \
src/pcre2posix.h \
src/pcre2test.c"
echo Detrailing
perl ./Detrail $files doc/p* doc/html/*
echo Done
#End

835
pcre2/README Normal file
View File

@ -0,0 +1,835 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
PCRE2 is a re-working of the original PCRE library to provide an entirely new
API. The latest release of PCRE2 is always available in three alternative
formats from:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
There is a mailing list for discussion about the development of PCRE (both the
original and new APIs) at pcre-dev@exim.org. You can access the archives and
subscribe or manage your subscription here:
https://lists.exim.org/mailman/listinfo/pcre-dev
Please read the NEWS file if you are upgrading from a previous release.
The contents of this README file are:
The PCRE2 APIs
Documentation for PCRE2
Contributions by users of PCRE2
Building PCRE2 on non-Unix-like systems
Building PCRE2 without using autotools
Building PCRE2 using autotools
Retrieving configuration information
Shared libraries
Cross-compiling using autotools
Making new tarballs
Testing PCRE2
Character tables
File manifest
The PCRE2 APIs
--------------
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. There are no C++
wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
library that are based on the POSIX regular expression API (see the pcre2posix
man page). These can be found in a library called libpcre2posix. Note that this
just provides a POSIX calling interface to PCRE2; the regular expressions
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
and does not give full access to all of PCRE2's facilities.
The header file for the POSIX-style functions is called pcre2posix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE2 with
an existing program that uses the POSIX API, pcre2posix.h will have to be
renamed or pointed at by a link.
If you are using the POSIX interface to PCRE2 and there is already a POSIX
regex library installed on your system, as well as worrying about the regex.h
header file (as mentioned above), you must also take care when linking programs
to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
pick up the POSIX functions of the same name from the other library.
One way of avoiding this confusion is to compile PCRE2 with the addition of
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
effect of renaming the functions so that the names no longer clash. Of course,
you have to do the same thing for your applications, or write them using the
new names.
Documentation for PCRE2
-----------------------
If you install PCRE2 in the normal way on a Unix-like system, you will end up
with a set of man pages whose names all start with "pcre2". The one that is
just called "pcre2" lists all the others. In addition to these man pages, the
PCRE2 documentation is supplied in two other forms:
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
doc/pcre2test.txt in the source distribution. The first of these is a
concatenation of the text forms of all the section 3 man pages except the
listing of pcre2demo.c and those that summarize individual functions. The
other two are the text forms of the section 1 man pages for the pcre2grep
and pcre2test commands. These text forms are provided for ease of scanning
with text editors or similar tools. They are installed in
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
(defaulting to /usr/local).
2. A set of files containing all the documentation in HTML form, hyperlinked
in various ways, and rooted in a file called index.html, is distributed in
doc/html and installed in <prefix>/share/doc/pcre2/html.
Building PCRE2 on non-Unix-like systems
---------------------------------------
For a non-Unix-like system, please read the comments in the file
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
"make" you may be able to build PCRE2 using autotools in the same way as for
many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
PCRE2 has been compiled on many different operating systems. It should be
straightforward to build PCRE2 on any system that has a Standard C compiler and
library, because it uses only Standard C functions.
Building PCRE2 without using autotools
--------------------------------------
The use of autotools (in particular, libtool) is problematic in some
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
file for ways of building PCRE2 without using autotools.
Building PCRE2 using autotools
------------------------------
The following instructions assume the use of the widely used "configure; make;
make install" (autotools) process.
To build PCRE2 on system that supports autotools, first run the "configure"
command from the PCRE2 distribution directory, with your current directory set
to the directory where you want the files to be created. This command is a
standard GNU "autoconf" configuration script, for which generic instructions
are supplied in the file INSTALL.
Most commonly, people build PCRE2 within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However,
the usual methods of changing standard defaults are available. For example:
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
This command specifies that the C compiler should be run with the flags '-O2
-Wall' instead of the default, and that "make install" should install PCRE2
under /opt/local instead of the default /usr/local.
If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE2 source
into /source/pcre2/pcre2-xxx, but you want to build it in
/build/pcre2/pcre2-xxx:
cd /build/pcre2/pcre2-xxx
/source/pcre2/pcre2-xxx/configure
PCRE2 is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE2
library. They are also documented in the pcre2build man page.
. By default, both shared and static libraries are built. You can change this
by adding one of these options to the "configure" command:
--disable-shared
--disable-static
(See also "Shared libraries on Unix-like systems" below.)
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
the "configure" command, the 16-bit library is also built. If you add
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
to disable building the 8-bit library.
. If you want to include support for just-in-time compiling, which can give
large performance improvements on certain platforms, add --enable-jit to the
"configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error.
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
you add --disable-pcre2grep-jit to the "configure" command.
. If you do not want to make use of the support for UTF-8 Unicode character
strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit
library, or UTF-32 Unicode character strings in the 32-bit library, you can
add --disable-unicode to the "configure" command. This reduces the size of
the libraries. It is not possible to configure one library with Unicode
support, and another without, in the same configuration.
When Unicode support is available, the use of a UTF encoding still has to be
enabled by setting the PCRE2_UTF option at run time or starting a pattern
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. It is
not possible to use both --enable-unicode and --enable-ebcdic at the same
time.
As well as supporting UTF strings, Unicode support includes support for the
\P, \p, and \X sequences that recognize Unicode character properties.
However, only the basic two-letter properties such as Lu are supported.
Escape sequences such as \d and \w in patterns do not by default make use of
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
or starting a pattern with (*UCP).
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
of the preceding, or any of the Unicode newline sequences, as indicating the
end of a line. Whatever you specify at build time is the default; the caller
of PCRE2 can change the selection at run time. The default newline indicator
is a single LF character (the Unix standard). You can specify the default
newline indicator by adding --enable-newline-is-cr, --enable-newline-is-lf,
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
--enable-newline-is-any to the "configure" command, respectively.
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
the standard tests will fail, because the lines in the test files end with
LF. Even if the files are edited to change the line endings, there are likely
to be some failures. With --enable-newline-is-anycrlf or
--enable-newline-is-any, many tests should succeed, but there may be some
failures.
. By default, the sequence \R in a pattern matches any Unicode line ending
sequence. This is independent of the option specifying what PCRE2 considers
to be the end of a line (see above). However, the caller of PCRE2 can
restrict \R to match only CR, LF, or CRLF. You can make this the default by
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
pattern. This limits the amount of system stack that a pattern uses when it
is compiled. The default is 250, but you can change it by setting, for
example,
--with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of resources it uses
when matching a pattern. If the limit is exceeded during a match, the match
fails. The default is ten million. You can change the default by setting, for
example,
--with-match-limit=500000
on the "configure" command. This is just the default; individual calls to
pcre2_match() can supply their own value. There is more discussion on the
pcre2api man page.
. There is a separate counter that limits the depth of recursive function calls
during a matching process. This also has a default of ten million, which is
essentially "unlimited". You can change the default by setting, for example,
--with-match-limit-recursion=500000
Recursive function calls use up the runtime stack; running out of stack can
cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcre2stack man page.
. In the 8-bit library, the default maximum compiled pattern size is around
64K. You can increase this by adding --with-link-size=3 to the "configure"
command. PCRE2 then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
the same as --with-link-size=4, which (in both libraries) uses four-byte
offsets. Increasing the internal link size reduces performance in the 8-bit
and 16-bit libraries. In the 32-bit library, the link size setting is
ignored, as 4-byte offsets are always used.
. You can build PCRE2 so that its internal match() function that is called from
pcre2_match() does not call itself recursively. Instead, it uses memory
blocks obtained from the heap to save data that would otherwise be saved on
the stack. To build PCRE2 like this, use
--disable-stack-for-recursion
on the "configure" command. PCRE2 runs more slowly in this mode, but it may
be necessary in environments with limited stack sizes. This applies only to
the normal execution of the pcre2_match() function; if JIT support is being
successfully used, it is not relevant. Equally, it does not apply to
pcre2_dfa_match(), which does not use deeply nested recursion. There is a
discussion about stack sizes in the pcre2stack man page.
. For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, it uses a set of
tables for ASCII encoding that is part of the distribution. If you specify
--enable-rebuild-chartables
a program called dftables is compiled and run in the default C locale when
you obey "make". It builds a source file called pcre2_chartables.c. If you do
not specify this option, pcre2_chartables.c is created as a copy of
pcre2_chartables.c.dist. See "Character tables" below for further
information.
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
character code (as opposed to ASCII/Unicode) by specifying
--enable-ebcdic --disable-unicode
This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
. In environments where valgrind is installed, if you specify
--enable-valgrind
PCRE2 will use valgrind annotations to mark certain memory regions as
unaddressable. This allows it to detect invalid memory accesses, and is
mostly useful for debugging PCRE2 itself.
. In environments where the gcc compiler is used and lcov version 1.6 or above
is installed, if you specify
--enable-coverage
the build process implements a code coverage report for the test suite. The
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE2 for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE2. There is more information about coverage
reporting in the "pcre2build" documentation.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
specifying one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
Of course, the relevant libraries must be installed on your system.
. The default size (in bytes) of the internal buffer used by pcre2grep can be
set by, for example:
--with-pcre2grep-bufsize=51200
The value must be a plain integer. The default is 20480.
. It is possible to compile pcre2test so that it links with the libreadline
or libedit libraries, by specifying, respectively,
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
If this is done, when pcre2test's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licenced, so if you distribute a binary of
pcre2test linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
Enabling libreadline causes the -lreadline option to be added to the
pcre2test build. In many operating environments with a sytem-installed
readline library this is sufficient. However, in some environments (e.g. if
an unmodified distribution version of readline is in use), it may be
necessary to specify something like LIBS="-lncurses" as well. This is
because, to quote the readline INSTALL, "Readline uses the termcap functions,
but does not link with the termcap or curses library itself, allowing
applications which link with readline the to choose an appropriate library."
If you get error messages about missing functions tgetstr, tgetent, tputs,
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
should fix it.
The "configure" script builds the following files for the basic C library:
. Makefile the makefile that builds the library
. src/config.h build-time configuration options for the library
. src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc )
. libpcre2-posix.pc )
. libtool script that builds shared and/or static libraries
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
tarballs under the names config.h.generic and pcre2.h.generic. These are
provided for those who have to build PCRE2 without using "configure" or CMake.
If you use "configure" or CMake, the .generic versions are not used.
The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.
Once "configure" has run, you can run "make". This builds whichever of the
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
program called pcre2test. If you enabled JIT support with --enable-jit, another
test program called pcre2_jit_test is built as well. If the 8-bit library is
built, libpcre2-posix and the pcre2grep command are also built. Running
"make" with the -j option may speed up compilation on multiprocessor systems.
The command "make check" runs all the appropriate tests. Details of the PCRE2
tests are given below in a separate section of this document. The -j option of
"make" can also be used when running the tests.
You can use "make install" to install PCRE2 into live directories on your
system. The following are installed (file names are all relative to the
<prefix> that is set when "configure" is run):
Commands (bin):
pcre2test
pcre2grep (if 8-bit support is enabled)
pcre2-config
Libraries (lib):
libpcre2-8 (if 8-bit support is enabled)
libpcre2-16 (if 16-bit support is enabled)
libpcre2-32 (if 32-bit support is enabled)
libpcre2-posix (if 8-bit support is enabled)
Configuration information (lib/pkgconfig):
libpcre2-8.pc
libpcre2-16.pc
libpcre2-32.pc
libpcre2-posix.pc
Header files (include):
pcre2.h
pcre2posix.h
Man pages (share/man/man{1,3}):
pcre2grep.1
pcre2test.1
pcre2-config.1
pcre2.3
pcre2*.3 (lots more pages, all starting "pcre2")
HTML documentation (share/doc/pcre2/html):
index.html
*.html (lots more pages, hyperlinked from index.html)
Text file documentation (share/doc/pcre2):
AUTHORS
COPYING
ChangeLog
LICENCE
NEWS
README
pcre2.txt (a concatenation of the man(3) pages)
pcre2test.txt the pcre2test man page
pcre2grep.txt the pcre2grep man page
pcre2-config.txt the pcre2-config man page
If you want to remove PCRE2 from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs.
Retrieving configuration information
------------------------------------
Running "make install" installs the command pcre2-config, which can be used to
recall information about the PCRE2 configuration and installation. For example:
pcre2-config --version
prints the version number, and
pcre2-config --libs8
outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information
about installed libraries. Instead of separate commands for each library, a
single command is used. For example:
pkg-config --libs libpcre2-16
The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig.
Shared libraries
----------------
The default distribution builds PCRE2 as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library
support relies on the "libtool" script which is built as part of the
"configure" process.
The libtool script is used to compile and link both shared and static
libraries. They are placed in a subdirectory called .libs when they are newly
built. The programs pcre2test and pcre2grep are built to use these uninstalled
libraries (by means of wrapper scripts in the case of shared libraries). When
you use "make install" to install shared libraries, pcre2grep and pcre2test are
automatically re-built to use the newly installed shared libraries before being
installed themselves. However, the versions left in the build directory still
use the uninstalled libraries.
To build PCRE2 using static libraries only you must use --disable-shared when
configuring it. For example:
./configure --prefix=/usr/gnu --disable-shared
Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries.
Cross-compiling using autotools
-------------------------------
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE2 for some other host. However, you should NOT
specify --enable-rebuild-chartables, because if you do, the dftables.c source
file is compiled and run on the local host, in order to generate the inbuilt
character tables (the pcre2_chartables.c file). This will probably not work,
because dftables.c needs to be compiled with the local compiler, not the cross
compiler.
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
created by making a copy of pcre2_chartables.c.dist, which is a default set of
tables that assumes ASCII code. Cross-compiling with the default tables should
not be a problem.
If you need to modify the character tables when cross-compiling, you should
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
and run it on the local host to make a new version of pcre2_chartables.c.dist.
Then when you cross-compile PCRE2 this new version of the tables will be used.
Making new tarballs
-------------------
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the PrepareRelease script before making a distribution. This
script creates the .txt and HTML forms of the documentation from the man pages.
Testing PCRE2
------------
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
There is another script called RunGrepTest that tests the pcre2grep command.
When JIT support is enabled, a third test program called pcre2_jit_test is
built. Both the scripts and all the program tests are run if you obey "make
check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcre2test test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. RunTest uses a file called testtry to hold the main output
from pcre2test. Other files whose names begin with "test" are used as working
files in some tests.
Some tests are relevant only when certain build-time options were selected. For
example, the tests for UTF-8/16/32 features are run only when Unicode support
is available. RunTest outputs a comment when it skips a test.
Many (but not all) of the tests that are not skipped are run twice if JIT
support is available. On the second run, JIT compilation is forced. This
testing can be suppressed by putting "nojit" on the RunTest command line.
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
libraries that are enabled. If you want to run just one set of tests, call
RunTest with either the -8, -16 or -32 option.
If valgrind is installed, you can run the tests under it by putting "valgrind"
on the RunTest command line. To run pcre2test on just one or more specific test
files, give their numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
end), or a number preceded by ~ to exclude a test. For example:
Runtest 3-15 ~10
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
except test 13. Whatever order the arguments are in, the tests are always run
in numerical order.
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
The test sequence starts with "test 0", which is a special test that has no
input file, and whose output is not checked. This is because it will be
different on different hardware and with different configurations. The test
exists in order to exercise some of pcre2test's code that would not otherwise
be run.
Tests 1 and 2 can always be run, as they expect only plain text strings (not
UTF) and make no use of Unicode properties. The first test file can be fed
directly into the perltest.sh script to check that Perl gives the same results.
The only difference you should see is in the first few lines, where the Perl
version is given instead of the PCRE2 version. The second set of tests check
auxiliary functions, error detection, and run-time flags that are specific to
PCRE2. It also uses the debugging flags to check some of the internals of
pcre2_compile().
If you build PCRE2 with a locale setting that is not the standard C locale, the
character tables may be different (see next paragraph). In some cases, this may
cause failures in the second set of tests. For example, in a locale where the
isprint() function yields TRUE for characters in the range 128-255, the use of
[:isascii:] inside a character class defines a different set of characters, and
this shows up in this test as a difference in the compiled code, which is being
listed for checking. For example, where the comparison test output contains
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
cases. This is not a bug in PCRE2.
Test 3 checks pcre2_maketables(), the facility for building a set of character
tables for a specific locale and using them instead of the default tables. The
script uses the "locale" command to check for the availability of the "fr_FR",
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
the list of available locales, the third test cannot be run, and a comment is
output to say why. If running this test produces an error like this:
** Failed to set locale "fr_FR"
it means that the given locale is not available on your system, despite being
listed by "locale". This does not mean that PCRE2 is broken. There are three
alternative output files for the third test, because three different versions
of the French locale have been encountered. The test passes if its output
matches any one of them.
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
with the perltest.sh script, and test 5 checking PCRE2-specific things.
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
non-UTF mode and UTF-mode with Unicode property support, respectively.
Test 8 checks some internal offsets and code size features; it is run only when
the default "link size" of 2 is set (in other cases the sizes change) and when
Unicode support is enabled.
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
16-bit and 32-bit modes. These are tests that generate different output in
8-bit mode. Each pair are for general cases and Unicode support, respectively.
Test 13 checks the handling of non-UTF characters greater than 255 by
pcre2_dfa_match() in 16-bit and 32-bit modes.
Test 14 contains a number of tests that must not be run with JIT. They check,
among other non-JIT things, the match-limiting features of the intepretive
matcher.
Test 15 is run only when JIT support is not available. It checks that an
attempt to use JIT has the expected behaviour.
Test 16 is run only when JIT support is available. It checks JIT complete and
partial modes, match-limiting under JIT, and other JIT-specific features.
Tests 17 and 18 are run only in 8-bit mode. They check the POSIX interface to
the 8-bit library, without and with Unicode support, respectively.
Test 19 checks the serialization functions by writing a set of compiled
patterns to a file, and then reloading and checking them.
Character tables
----------------
For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, a set of tables that is
built into the library is used. The pcre2_maketables() function can be called
by an application to create a new set of tables in the current locale. This are
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
compile context.
The source file called pcre2_chartables.c contains the default set of tables.
By default, this is created as a copy of pcre2_chartables.c.dist, which
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
specified for ./configure, a different version of pcre2_chartables.c is built
by the program dftables (compiled from dftables.c), which uses the ANSI C
character handling functions such as isalnum(), isalpha(), isupper(),
islower(), etc. to build the table sources. This means that the default C
locale which is set for your system will control the contents of these default
tables. You can change the default tables by editing pcre2_chartables.c and
then re-building PCRE2. If you do this, you should take care to ensure that the
file does not get automatically re-generated. The best way to do this is to
move pcre2_chartables.c.dist out of the way and replace it with your customized
tables.
When the dftables program is run as a result of --enable-rebuild-chartables,
it uses the default C locale that is set on your system. It does not pay
attention to the LC_xxx environment variables. In other words, it uses the
system's default locale rather than whatever the compiling user happens to have
set. If you really do want to build a source set of character tables in a
locale that is specified by the LC_xxx variables, you can run the dftables
program by hand with the -L option. For example:
./dftables -L pcre2_chartables.c.special
The first two 256-byte tables provide lower casing and case flipping functions,
respectively. The next table consists of three 32-byte bit maps which identify
digits, "word" characters, and white space, respectively. These are used when
building 32-byte bit maps that represent character classes for code points less
than 256. The final 256-byte table has bits indicating various character types,
as follows:
1 white space character
2 letter
4 decimal digit
8 hexadecimal digit
16 alphanumeric or '_'
128 regular expression metacharacter or binary zero
You should not alter the set of characters that contain the 128 bit, as that
will cause PCRE2 to malfunction.
File manifest
-------------
The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
src/dftables.c auxiliary program for building pcre2_chartables.c
when --enable-rebuild-chartables is specified
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_compile.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2_printint.c debugging function that is used by pcre2test,
src/config.h.in template for config.h, when built by "configure"
src/pcre2.h.in template for pcre2.h when built by "configure"
src/pcre2posix.h header for the external POSIX wrapper API
src/pcre2_internal.h header for internal use
src/pcre2_intmodedep.h a mode-specific internal header
src/pcre2_ucp.h header for Unicode property handling
sljit/* source files for the JIT compiler
(B) Source files for programs that use PCRE2:
src/pcre2demo.c simple demonstration of coding calls to PCRE2
src/pcre2grep.c source of a grep utility that uses PCRE2
src/pcre2test.c comprehensive test program
src/pcre2_printint.c part of pcre2test
src/pcre2_jit_test.c JIT test program
(C) Auxiliary files:
132html script to turn "man" pages into HTML
AUTHORS information about the author of PCRE2
ChangeLog log of changes to the code
CleanTxt script to clean nroff output for txt man pages
Detrail script to remove trailing spaces
HACKING some notes about the internals of PCRE2
INSTALL generic installation instructions
LICENCE conditions for the use of PCRE2
COPYING the same, using GNU's standard name
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
PrepareRelease script to make preparations for "make dist"
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcre2grep tests
aclocal.m4 m4 macros (generated by "aclocal")
config.guess ) files used by libtool,
config.sub ) used only when building a shared library
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
depcomp ) script to find program dependencies, generated by
) automake
doc/*.3 man page sources for PCRE2
doc/*.1 man page sources for pcre2grep and pcre2test
doc/index.html.src the base HTML page
doc/html/* HTML documentation
doc/pcre2.txt plain text version of the man pages
doc/pcre2test.txt plain text documentation of test program
install-sh a shell script for installing files
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
ltmain.sh file used to build a libtool script
missing ) common stub for a few missing GNU programs while
) installing, generated by automake
mkinstalldirs script for making install directories
perltest.sh Script for running a Perl test program
pcre2-config.in source of script which retains PCRE2 information
testdata/testinput* test data for main library tests
testdata/testoutput* expected test results
testdata/grep* input and output for pcre2grep tests
testdata/* other supporting test files
(D) Auxiliary files for cmake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindPackageHandleStandardArgs.cmake
cmake/FindEditline.cmake
cmake/FindReadline.cmake
CMakeLists.txt
config-cmake.h.in
(E) Auxiliary files for building PCRE2 "by hand"
pcre2.h.generic ) a version of the public PCRE2 header file
) for use in non-"configure" environments
config.h.generic ) a version of config.h for use in non-"configure"
) environments
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Last updated: 24 April 2015

612
pcre2/RunGrepTest Executable file
View File

@ -0,0 +1,612 @@
#! /bin/sh
# Run pcre2grep tests. The assumption is that the PCRE2 tests check the library
# itself. What we are checking here is the file handling and options that are
# supported by pcre2grep. This script must be run in the build directory.
# Set the C locale, so that sort(1) behaves predictably.
LC_ALL=C
export LC_ALL
# Remove any non-default colouring and aliases that the caller may have set.
unset PCRE2GREP_COLOUR PCRE2GREP_COLOR
unset cp ls mv rm
# Remember the current (build) directory, set the program to be tested, and
# valgrind settings when requested.
builddir=`pwd`
pcre2grep=$builddir/pcre2grep
if [ ! -x $pcre2grep ] ; then
echo "** $pcre2grep does not exist or is not execuatble."
exit 1
fi
valgrind=
while [ $# -gt 0 ] ; do
case $1 in
valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all";;
*) echo "RunGrepTest: Unknown argument $1"; exit 1;;
esac
shift
done
echo " "
pcre2grep_version=`$pcre2grep -V`
if [ "$valgrind" = "" ] ; then
echo "Testing $pcre2grep_version"
else
echo "Testing $pcre2grep_version using valgrind"
fi
# Set up a suitable "diff" command for comparison. Some systems have a diff
# that lacks a -u option. Try to deal with this; better do the test for the -b
# option as well.
cf="diff"
diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b"
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
# If this test is being run from "make check", $srcdir will be set. If not, set
# it to the current or parent directory, whichever one contains the test data.
# Subsequently, we run most of the pcre2grep tests in the source directory so
# that the file names in the output are always the same.
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
if [ -d "./testdata" ] ; then
srcdir=.
elif [ -d "../testdata" ] ; then
srcdir=..
else
echo "Cannot find the testdata directory"
exit 1
fi
fi
# Check for the availability of UTF-8 support
./pcre2test -C unicode >/dev/null
utf8=$?
# ------ Function to run and check a special pcre2grep arguments test -------
checkspecial()
{
$valgrind ./pcre2grep $1 >>testtrygrep 2>&1
if [ $? -ne $2 ] ; then
echo "** pcre2grep $1 failed - check testtrygrep"
exit 1
fi
}
# ------ Normal tests ------
echo "Testing pcre2grep main features"
echo "---------------------------- Test 1 ------------------------------" >testtrygrep
(cd $srcdir; $valgrind $pcre2grep PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 2 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep '^PATTERN' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 3 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 4 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -ic PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 5 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 6 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 7 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 8 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 9 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 10 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 11 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -vn pattern ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 12 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -ix pattern ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 13 -----------------------------" >>testtrygrep
echo seventeen >testtemp1grep
(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 14 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 18 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 19 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 20 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 21 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -nA3 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 22 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -nB3 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 23 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -C3 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 24 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -A9 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 25 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -nB9 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 26 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 27 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -A10 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 28 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -nB10 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 29 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 30 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 31 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 32 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 33 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 34 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 35 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep
echo "RC=$?" >>testtrygrep
echo "======== STDERR ========" >>testtrygrep
cat teststderrgrep >>testtrygrep
echo "---------------------------- Test 38 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep '>\x00<' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 39 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 40 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 41 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 42 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 43 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 44 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 45 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -Fx "AB.VE
elephant" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 48 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -F "AB.VE
elephant" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 49 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -F -e DATA -e "AB.VE
elephant" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 50 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 51 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 52 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --colour=always jumps ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 53 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 54 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 55 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -c lazy ./testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -c -l lazy ./testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --regex=PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 59 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 60 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --regex PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 61 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --regexp PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 62 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 65 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 66 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 67 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 68 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 69 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 72 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 73 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 74 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 75 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 76 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 77 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 78 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 79 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 80 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 81 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 82 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
echo testdata/grepinput3 >testtemp1grep
(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 85 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 86 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 87 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 88 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 89 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 90 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 91 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 92 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 93 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 94 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 95 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >testtemp2grep
(cd $srcdir; $valgrind $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 101 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 102 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 103 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 104 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 105 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 106 -----------------------------" >>testtrygrep
(cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
echo "a" >testtemp1grep
echo "aaaaa" >>testtemp1grep
(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtrygrep
if [ $? != 0 ] ; then exit 1; fi
# These tests require UTF-8 support
if [ $utf8 -ne 0 ] ; then
echo "Testing pcre2grep UTF-8 features"
echo "---------------------------- Test U1 ------------------------------" >testtrygrep
(cd $srcdir; $valgrind $pcre2grep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U2 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
$cf $srcdir/testdata/grepoutput8 testtrygrep
if [ $? != 0 ] ; then exit 1; fi
else
echo "Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library"
fi
# We go to some contortions to try to ensure that the tests for the various
# newline settings will work in environments where the normal newline sequence
# is not \n. Do not use exported files, whose line endings might be changed.
# Instead, create an input file using printf so that its contents are exactly
# what we want. Note the messy fudge to get printf to write a string that
# starts with a hyphen. These tests are run in the build directory.
echo "Testing pcre2grep newline settings"
printf "abc\rdef\r\nghi\njkl" >testNinputgrep
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
$valgrind $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
pattern=`printf 'def\rjkl'`
$valgrind $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
$cf $srcdir/testdata/grepoutputN testtrygrep
if [ $? != 0 ] ; then exit 1; fi
# Finally, some tests to exercise code that is not tested above, just to be
# sure that it runs OK. Doing this improves the coverage statistics. The output
# is not checked.
echo "Testing miscellaneous pcre2grep arguments (unchecked)"
echo '' >testtrygrep
checkspecial '-xxxxx' 2
checkspecial '--help' 0
checkspecial '--line-buffered --colour=auto abc /dev/null' 1
# Clean up local working files
rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
exit 0
# End

749
pcre2/RunTest Executable file
View File

@ -0,0 +1,749 @@
#! /bin/sh
###############################################################################
# Run the PCRE2 tests using the pcre2test program. The appropriate tests are
# selected, depending on which build-time options were used.
#
# When JIT support is available, all appropriate tests are run with and without
# JIT, unless "-nojit" is given on the command line. There are also two tests
# for JIT-specific features, one to be run when JIT support is available
# (unless "-nojit" is specified), and one when it is not.
#
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
# possible to select which to test by giving "-8", "-16" or "-32" on the
# command line.
#
# As well as "-nojit", "-8", "-16", and "-32", arguments for this script are
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
# except test 10. Whatever order the arguments are in, the tests are always run
# in numerical order.
#
# Inappropriate tests are automatically skipped (with a comment to say so). For
# example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
# support is compiled, test 15 is skipped.
#
# Other arguments can be one of the words "-valgrind", "-valgrind-log", or
# "-sim" followed by an argument to run cross-compiled executables under a
# simulator, for example:
#
# RunTest 3 -sim "qemu-arm -s 8388608"
#
# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may
# be given without the leading "-" character.
#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
# EBCDIC test that can be useful for checking certain EBCDIC features, even
# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for
# this test to be run.
#
# If the script is obeyed as "RunTest list", a list of available tests is
# output, but none of them are run.
###############################################################################
# Define test titles in variables so that they can be output as a list. Some
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)"
title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)"
title2="Test 2: API, errors, internals, and non-Perl stuff"
title3="Test 3: Locale-specific features"
title4A="Test 4: UTF"
title4B=" and Unicode property support (compatible with Perl >= 5.10)"
title5A="Test 5: API, internals, and non-Perl stuff for UTF"
title5B=" and UCP support"
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
title7A="Test 7: DFA matching with UTF"
title7B=" and Unicode property support"
title8="Test 8: Internal offsets and code size tests"
title9="Test 9: Specials for the basic 8-bit library"
title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
title14="Test 14: Non-JIT limits and other non-JIT tests"
title15="Test 15: JIT-specific features when JIT is not available"
title16="Test 16: JIT-specific features when JIT is available"
title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP"
title18="Test 18: Tests of the POSIX interface with UTF/UCP"
title19="Test 19: Serialization tests"
maxtest=18
if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title0
echo $title1
echo $title2 "(not UTF or UCP)"
echo $title3
echo $title4A $title4B
echo $title5A $title5B
echo $title6
echo $title7A $title7B
echo $title8
echo $title9
echo $title10
echo $title11
echo $title12
echo $title13
echo $title14
echo $title15
echo $title16
echo $title17
echo $title18
echo $title19
exit 0
fi
# Set up a suitable "diff" command for comparison. Some systems
# have a diff that lacks a -u option. Try to deal with this.
cf="diff"
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
# Find the test data
if [ -n "$srcdir" -a -d "$srcdir" ] ; then
testdata="$srcdir/testdata"
elif [ -d "./testdata" ] ; then
testdata=./testdata
elif [ -d "../testdata" ] ; then
testdata=../testdata
else
echo "Cannot find the testdata directory"
exit 1
fi
# ------ Function to check results of a test -------
# This function is called with three parameters:
#
# $1 the value of $? after a call to pcre2test
# $2 the suffix of the output file to compare with
# $3 the $opt value (empty, -jit, or -dfa)
#
# Note: must define using name(), not "function name", for Solaris.
checkresult()
{
if [ $1 -ne 0 ] ; then
echo "** pcre2test failed - check testtry"
exit 1
fi
case "$3" in
-jit) with=" with JIT";;
-dfa) with=" with DFA";;
*) with="";;
esac
$cf $testdata/testoutput$2 testtry
if [ $? != 0 ] ; then
echo ""
echo "** Test $2 failed$with"
exit 1
fi
echo " OK$with"
}
# ------ Function to run and check a special pcre2test arguments test -------
checkspecial()
{
$valgrind ./pcre2test $1 >>testtry
if [ $? -ne 0 ] ; then
echo "** pcre2test $1 failed - check testtry"
exit 1
fi
}
# ------ Special EBCDIC Test -------
if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
$valgrind ./pcre2test -C ebcdic >/dev/null
ebcdic=$?
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
fi
for opt in "" "-dfa"; do
./pcre2test -q $opt $testdata/testinputEBC >testtry
checkresult $? EBC "$opt"
done
exit 0
fi
# ------ Normal Tests ------
# Default values
arg8=
arg16=
arg32=
nojit=
sim=
skip=
valgrind=
# This is in case the caller has set aliases (as I do - PH)
unset cp ls mv rm
# Process options and select which tests to run; for those that are explicitly
# requested, check that the necessary optional facilities are available.
do0=no
do1=no
do2=no
do3=no
do4=no
do5=no
do6=no
do7=no
do8=no
do9=no
do10=no
do11=no
do12=no
do13=no
do14=no
do15=no
do16=no
do17=no
do18=no
do19=no
while [ $# -gt 0 ] ; do
case $1 in
0) do0=yes;;
1) do1=yes;;
2) do2=yes;;
3) do3=yes;;
4) do4=yes;;
5) do5=yes;;
6) do6=yes;;
7) do7=yes;;
8) do8=yes;;
9) do9=yes;;
10) do10=yes;;
11) do11=yes;;
12) do12=yes;;
13) do13=yes;;
14) do14=yes;;
15) do15=yes;;
16) do16=yes;;
17) do17=yes;;
18) do18=yes;;
19) do19=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
nojit|-nojit) nojit=yes;;
sim|-sim) shift; sim=$1;;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
~*)
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
else
echo "Unknown option or test selector '$1'"; exit 1
fi
;;
*-*)
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
tf=`expr "$1" : '\([0-9]*\)'`
tt=`expr "$1" : '.*-\([0-9]*\)'`
if [ "$tt" = "" ] ; then tt=$maxtest; fi
if expr \( "$tt" ">" "$maxtest" \) >/dev/null; then
echo "Invalid test range '$1'"; exit 1
fi
while expr "$tf" "<=" "$tt" >/dev/null; do
eval do${tf}=yes
tf=`expr $tf + 1`
done
else
echo "Invalid test range '$1'"; exit 1
fi
;;
*) echo "Unknown option or test selector '$1'"; exit 1;;
esac
shift
done
# Find which optional facilities are available.
$sim ./pcre2test -C linksize >/dev/null
link_size=$?
if [ $link_size -lt 2 ] ; then
echo "RunTest: Failed to find internal link size"
exit 1
fi
if [ $link_size -gt 4 ] ; then
echo "RunTest: Failed to find internal link size"
exit 1
fi
# If it is possible to set the system stack size, arrange to set a value for
# test 2, which needs more than the even the Linux default when PCRE2 has been
# compiled with -fsanitize=address.
$sim ./pcre2test -S 1 /dev/null /dev/null
if [ $? -eq 0 ] ; then
test2stack="-S 16"
else
test2stack=""
fi
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
# one need be.
$sim ./pcre2test -C pcre2-8 >/dev/null
support8=$?
$sim ./pcre2test -C pcre2-16 >/dev/null
support16=$?
$sim ./pcre2test -C pcre2-32 >/dev/null
support32=$?
# Initialize all bitsizes skipped
test8=skip
test16=skip
test32=skip
# If no bitsize arguments, select all that are available
if [ "$arg8$arg16$arg32" = "" ] ; then
if [ $support8 -ne 0 ] ; then
test8=-8
fi
if [ $support16 -ne 0 ] ; then
test16=-16
fi
if [ $support32 -ne 0 ] ; then
test32=-32
fi
# Otherwise, select requested bit sizes
else
if [ "$arg8" = yes ] ; then
if [ $support8 -eq 0 ] ; then
echo "Cannot run 8-bit library tests: 8-bit library not compiled"
exit 1
fi
test8=-8
fi
if [ "$arg16" = yes ] ; then
if [ $support16 -eq 0 ] ; then
echo "Cannot run 16-bit library tests: 16-bit library not compiled"
exit 1
fi
test16=-16
fi
if [ "$arg32" = yes ] ; then
if [ $support32 -eq 0 ] ; then
echo "Cannot run 32-bit library tests: 32-bit library not compiled"
exit 1
fi
test32=-32
fi
fi
# UTF support is implied by Unicode support, and it always applies to all bit
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
# UTF-32 support.
$sim ./pcre2test -C unicode >/dev/null
utf=$?
jitopt=
$sim ./pcre2test -C jit >/dev/null
jit=$?
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
jitopt=-jit
fi
# If no specific tests were requested, select all. Those that are not
# relevant will be automatically skipped.
if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
$do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no \
]; then
do0=yes
do1=yes
do2=yes
do3=yes
do4=yes
do5=yes
do6=yes
do7=yes
do8=yes
do9=yes
do10=yes
do11=yes
do12=yes
do13=yes
do14=yes
do15=yes
do16=yes
do17=yes
do18=yes
do19=yes
fi
# Handle any explicit skips at this stage, so that an argument list may consist
# only of explicit skips.
for i in $skip; do eval do$i=no; done
# Show which release and which test data
echo ""
echo PCRE2 C library tests using test data from $testdata
$sim ./pcre2test /dev/null
echo ""
for bmode in "$test8" "$test16" "$test32"; do
case "$bmode" in
skip) continue;;
-16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
-32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
bits=32; echo "---- Testing 32-bit library ----"; echo "";;
-8) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
esac
# Test 0 is a special test. Its output is not checked, because it will
# be different on different hardware and with different configurations.
# Running this test just exercises the code.
if [ $do0 = yes ] ; then
echo $title0
echo '/abc/jit,memory' >testSinput
echo ' abc' >>testSinput
echo '' >testtry
checkspecial '-C'
checkspecial '--help'
checkspecial '-S 1 -t 10 testSinput'
echo " OK"
fi
# Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8
if [ $do1 = yes ] ; then
echo $title1
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
checkresult $? 1 "$opt"
done
fi
# PCRE2 tests that are not Perl-compatible: API, errors, internals
if [ $do2 = yes ] ; then
echo $title2 "(excluding UTF-$bits)"
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
checkresult $? 2 "$opt"
else
echo " "
echo "** Test 2 requires a lot of stack. If it has crashed with a"
echo "** segmentation fault, it may be that you do not have enough"
echo "** stack available by default. Please see the 'pcre2stack' man"
echo "** page for a discussion of PCRE2's stack usage."
echo " "
exit 1
fi
done
fi
# Locale-specific tests, provided that either the "fr_FR", "fr_CA", "french"
# or "fr" locale is available. The first two are Unix-like standards; the
# last two are for Windows. Unfortunately, different versions of the French
# locale give different outputs for some items. This test passes if the
# output matches any one of the alternative output files.
if [ $do3 = yes ] ; then
locale=
# In some environments locales that are listed by the "locale -a"
# command do not seem to work with setlocale(). Therefore, we do
# a preliminary test to see if pcre2test can set one before going
# on to use it.
for loc in 'fr_FR' 'french' 'fr' 'fr_CA'; do
locale -a | grep "^$loc\$" >/dev/null
if [ $? -eq 0 ] ; then
echo "/a/locale=$loc" | \
$sim $valgrind ./pcre2test -q $bmode | \
grep "Failed to set locale" >/dev/null
if [ $? -ne 0 ] ; then
locale=$loc
if [ "$locale" = "fr_FR" ] ; then
infile=$testdata/testinput3
outfile=$testdata/testoutput3
outfile2=$testdata/testoutput3A
outfile3=$testdata/testoutput3B
else
infile=test3input
outfile=test3output
outfile2=test3outputA
outfile3=test3outputB
sed "s/fr_FR/$loc/" $testdata/testinput3 >test3input
sed "s/fr_FR/$loc/" $testdata/testoutput3 >test3output
sed "s/fr_FR/$loc/" $testdata/testoutput3A >test3outputA
sed "s/fr_FR/$loc/" $testdata/testoutput3B >test3outputB
fi
break
fi
fi
done
if [ "$locale" != "" ] ; then
echo $title3 "(using '$locale' locale)"
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
if [ $? = 0 ] ; then
case "$opt" in
-jit) with=" with JIT";;
*) with="";;
esac
if $cf $outfile testtry >teststdout || \
$cf $outfile2 testtry >teststdout || \
$cf $outfile3 testtry >teststdout
then
echo " OK$with"
else
echo "** Locale test did not run successfully$with. The output did not match"
echo " $outfile, $outfile2 or $outfile3."
echo " This may mean that there is a problem with the locale settings rather"
echo " than a bug in PCRE2."
exit 1
fi
else exit 1
fi
done
else
echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr_CA',"
echo "'fr' or 'french' locales can be set, or the \"locale\" command is"
echo "not available to check for them."
echo " "
fi
fi
# Tests for UTF and Unicode property support
if [ $do4 = yes ] ; then
echo ${title4A}-${bits}${title4B}
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
checkresult $? 4 "$opt"
done
fi
fi
if [ $do5 = yes ] ; then
echo ${title5A}-${bits}$title5B
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
checkresult $? 5 "$opt"
done
fi
fi
# Tests for DFA matching support
if [ $do6 = yes ] ; then
echo $title6
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
checkresult $? 6 ""
fi
if [ $do7 = yes ] ; then
echo ${title7A}-${bits}$title7B
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
checkresult $? 7 ""
fi
fi
# Test of internal offsets and code sizes. This test is run only when there
# is UTF/UCP support and the link size is 2. The actual tests are
# mostly the same as in some of the above, but in this test we inspect some
# offsets and sizes that require a known link size. This is a doublecheck for
# the maintainer, just in case something changes unexpectely. The output from
# this test is different in 8-bit, 16-bit, and 32-bit modes, so there are
# mode-specific output files.
if [ $do8 = yes ] ; then
echo $title8
if [ $link_size -ne 2 ] ; then
echo " Skipped because link size is not 2"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
checkresult $? 8-$bits ""
fi
fi
# Tests for 8-bit-specific features
if [ "$do9" = yes ] ; then
echo $title9
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
checkresult $? 9 "$opt"
done
fi
fi
# Tests for UTF-8 and UCP 8-bit-specific features
if [ "$do10" = yes ] ; then
echo $title10
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
checkresult $? 10 "$opt"
done
fi
fi
# Tests for 16-bit and 32-bit features. Output is different for the two widths.
if [ $do11 = yes ] ; then
echo $title11
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
checkresult $? 11-$bits "$opt"
done
fi
fi
# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output
# is different for the two widths.
if [ $do12 = yes ] ; then
echo $title12
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
checkresult $? 12-$bits "$opt"
done
fi
fi
# Tests for 16/32-bit-specific features in DFA non-UTF modes
if [ $do13 = yes ] ; then
echo $title13
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
checkresult $? 13 ""
fi
fi
# Test non-JIT match and recursion limits
if [ $do14 = yes ] ; then
echo $title14
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry
checkresult $? 14 ""
fi
# Test JIT-specific features when JIT is not available
if [ $do15 = yes ] ; then
echo $title15
if [ $jit -ne 0 ] ; then
echo " Skipped because JIT is available"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry
checkresult $? 15 ""
fi
fi
# Test JIT-specific features when JIT is available
if [ $do16 = yes ] ; then
echo $title16
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
echo " Skipped because JIT is not available or nojit was specified"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry
checkresult $? 16 ""
fi
fi
# Tests for the POSIX interface without UTF/UCP (8-bit only)
if [ $do17 = yes ] ; then
echo $title17
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry
checkresult $? 17 ""
fi
fi
# Tests for the POSIX interface with UTF/UCP (8-bit only)
if [ $do18 = yes ] ; then
echo $title18
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry
checkresult $? 18 ""
fi
fi
# Serialization tests
if [ $do19 = yes ] ; then
echo $title19
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput19 testtry
checkresult $? 19 ""
fi
# End of loop for 8/16/32-bit tests
done
# Clean up local working files
rm -f testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
# End

463
pcre2/RunTest.bat Normal file
View File

@ -0,0 +1,463 @@
@echo off
@rem
@rem MS Windows batch file to run pcre2test on testfiles with the correct
@rem options. This file must use CRLF linebreaks to function properly,
@rem and requires both pcre2test and pcre2grep.
@rem
@rem ------------------------ HISTORY ----------------------------------
@rem This file was originally contributed to PCRE1 by Ralf Junker, and touched
@rem up by Daniel Richard G. Tests 10-12 added by Philip H.
@rem Philip H also changed test 3 to use "wintest" files.
@rem
@rem Updated by Tom Fortmann to support explicit test numbers on the command
@rem line. Added argument validation and added error reporting.
@rem
@rem Sheri Pierce added logic to skip feature dependent tests
@rem tests 4 5 9 15 and 18 require utf support
@rem tests 6 7 10 16 and 19 require ucp support
@rem 11 requires ucp and link size 2
@rem 12 requires presence of jit support
@rem 13 requires absence of jit support
@rem Sheri P also added override tests for study and jit testing
@rem Zoltan Herczeg added libpcre16 support
@rem Zoltan Herczeg added libpcre32 support
@rem -------------------------------------------------------------------
@rem
@rem The file was converted for PCRE2 by PH, February 2015.
setlocal enabledelayedexpansion
if [%srcdir%]==[] (
if exist testdata\ set srcdir=.)
if [%srcdir%]==[] (
if exist ..\testdata\ set srcdir=..)
if [%srcdir%]==[] (
if exist ..\..\testdata\ set srcdir=..\..)
if NOT exist %srcdir%\testdata\ (
Error: echo distribution testdata folder not found!
call :conferror
exit /b 1
goto :eof
)
if [%pcre2test%]==[] set pcre2test=.\pcre2test.exe
echo source dir is %srcdir%
echo pcre2test=%pcre2test%
if NOT exist %pcre2test% (
echo Error: %pcre2test% not found!
echo.
call :conferror
exit /b 1
)
%pcre2test% -C linksize >NUL
set link_size=%ERRORLEVEL%
%pcre2test% -C pcre2-8 >NUL
set support8=%ERRORLEVEL%
%pcre2test% -C pcre2-16 >NUL
set support16=%ERRORLEVEL%
%pcre2test% -C pcre2-32 >NUL
set support32=%ERRORLEVEL%
%pcre2test% -C unicode >NUL
set unicode=%ERRORLEVEL%
%pcre2test% -C jit >NUL
set jit=%ERRORLEVEL%
if %support8% EQU 1 (
if not exist testout8 md testout8
if not exist testoutjit8 md testoutjit8
)
if %support16% EQU 1 (
if not exist testout16 md testout16
if not exist testoutjit16 md testoutjit16
)
if %support16% EQU 1 (
if not exist testout32 md testout32
if not exist testoutjit32 md testoutjit32
)
set do1=no
set do2=no
set do3=no
set do4=no
set do5=no
set do6=no
set do7=no
set do8=no
set do9=no
set do10=no
set do11=no
set do12=no
set do13=no
set do14=no
set do15=no
set do16=no
set do17=no
set do18=no
set do19=no
set all=yes
for %%a in (%*) do (
set valid=no
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19) do if %%v == %%a set valid=yes
if "!valid!" == "yes" (
set do%%a=yes
set all=no
) else (
echo Invalid test number - %%a!
echo Usage %0 [ test_number ] ...
echo Where test_number is one or more optional test numbers 1 through 19, default is all tests.
exit /b 1
)
)
set failed="no"
if "%all%" == "yes" (
set do1=yes
set do2=yes
set do3=yes
set do4=yes
set do5=yes
set do6=yes
set do7=yes
set do8=yes
set do9=yes
set do10=yes
set do11=yes
set do12=yes
set do13=yes
set do14=yes
set do15=yes
set do16=yes
set do17=yes
set do18=yes
set do19=yes
)
@echo RunTest.bat's pcre2test output is written to newly created subfolders
@echo named testout{8,16,32} and testoutjit{8,16,32}.
@echo.
set mode=
set bits=8
:nextMode
if "%mode%" == "" (
if %support8% EQU 0 goto modeSkip
echo.
echo ---- Testing 8-bit library ----
echo.
)
if "%mode%" == "-16" (
if %support16% EQU 0 goto modeSkip
echo.
echo ---- Testing 16-bit library ----
echo.
)
if "%mode%" == "-32" (
if %support32% EQU 0 goto modeSkip
echo.
echo ---- Testing 32-bit library ----
echo.
)
if "%do1%" == "yes" call :do1
if "%do2%" == "yes" call :do2
if "%do3%" == "yes" call :do3
if "%do4%" == "yes" call :do4
if "%do5%" == "yes" call :do5
if "%do6%" == "yes" call :do6
if "%do7%" == "yes" call :do7
if "%do8%" == "yes" call :do8
if "%do9%" == "yes" call :do9
if "%do10%" == "yes" call :do10
if "%do11%" == "yes" call :do11
if "%do12%" == "yes" call :do12
if "%do13%" == "yes" call :do13
if "%do14%" == "yes" call :do14
if "%do15%" == "yes" call :do15
if "%do16%" == "yes" call :do16
if "%do17%" == "yes" call :do17
if "%do18%" == "yes" call :do18
if "%do19%" == "yes" call :do19
:modeSkip
if "%mode%" == "" (
set mode=-16
set bits=16
goto nextMode
)
if "%mode%" == "-16" (
set mode=-32
set bits=32
goto nextMode
)
@rem If mode is -32, testing is finished
if %failed% == "yes" (
echo In above output, one or more of the various tests failed!
exit /b 1
)
echo All OK
goto :eof
:runsub
@rem Function to execute pcre2test and compare the output
@rem Arguments are as follows:
@rem
@rem 1 = test number
@rem 2 = outputdir
@rem 3 = test name use double quotes
@rem 4 - 9 = pcre2test options
if [%1] == [] (
echo Missing test number argument!
exit /b 1
)
if [%2] == [] (
echo Missing outputdir!
exit /b 1
)
if [%3] == [] (
echo Missing test name argument!
exit /b 1
)
set testinput=testinput%1
set testoutput=testoutput%1
if exist %srcdir%\testdata\win%testinput% (
set testinput=wintestinput%1
set testoutput=wintestoutput%1
)
echo Test %1: %3
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput%
if errorlevel 1 (
echo. failed executing command-line:
echo. %pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput%
set failed="yes"
goto :eof
)
set type=
if [%1]==[8] (
set type=-%bits%
)
if [%1]==[11] (
set type=-%bits%
)
if [%1]==[12] (
set type=-%bits%
)
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
if errorlevel 1 (
echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
if [%1]==[2] (
echo.
echo ** Test 2 requires a lot of stack. PCRE2 can be configured to
echo ** use heap for recursion. Otherwise, to pass Test 2
echo ** you generally need to allocate 8 mb stack to PCRE2.
echo ** See the 'pcre2stack' page for a discussion of PCRE2's
echo ** stack usage.
echo.
)
if [%1]==[3] (
echo.
echo ** Test 3 failure usually means french locale is not
echo ** available on the system, rather than a bug or problem with PCRE2.
echo.
goto :eof
)
set failed="yes"
goto :eof
)
echo. Passed.
goto :eof
:do1
call :runsub 1 testout "Main non-UTF, non-UCP functionality (Compatible with Perl >= 5.10)" -q
if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do2
call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do3
call :runsub 3 testout "Locale-specific features" -q
if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do4
if %unicode% EQU 0 (
echo Test 4 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 4 testout "UTF-%bits% and Unicode property support - (Compatible with Perl >= 5.10)" -q
if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do5
if %unicode% EQU 0 (
echo Test 5 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits% and UCP" -q
if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do6
call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q -dfa
goto :eof
:do7
if %unicode% EQU 0 (
echo Test 7 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q -dfa
goto :eof
:do8
if NOT %link_size% EQU 2 (
echo Test 8 Skipped because link size is not 2.
goto :eof
)
if %unicode% EQU 0 (
echo Test 8 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 8 testout "Internal offsets and code size tests" -q
goto :eof
:do9
if NOT %bits% EQU 8 (
echo Test 9 Skipped when running 16/32-bit tests.
goto :eof
)
call :runsub 9 testout "Specials for the basic 8-bit library" -q
if %jit% EQU 1 call :runsub 9 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do10
if NOT %bits% EQU 8 (
echo Test 10 Skipped when running 16/32-bit tests.
goto :eof
)
if %unicode% EQU 0 (
echo Test 10 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 10 testout "Specials for the 8-bit library with Unicode support" -q
if %jit% EQU 1 call :runsub 10 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do11
if %bits% EQU 8 (
echo Test 11 Skipped when running 8-bit tests.
goto :eof
)
call :runsub 11 testout "Specials for the basic 16/32-bit library" -q
if %jit% EQU 1 call :runsub 11 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do12
if %bits% EQU 8 (
echo Test 12 Skipped when running 8-bit tests.
goto :eof
)
if %unicode% EQU 0 (
echo Test 12 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 12 testout "Specials for the 16/32-bit library with Unicode support" -q
if %jit% EQU 1 call :runsub 12 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do13
if %bits% EQU 8 (
echo Test 13 Skipped when running 8-bit tests.
goto :eof
)
call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q -dfa
goto :eof
:do14
call :runsub 14 testout "Non-JIT limits and other non_JIT tests" -q
goto :eof
:do15
if %jit% EQU 1 (
echo Test 15 Skipped due to presence of JIT support.
goto :eof
)
call :runsub 15 testout "JIT-specific features when JIT is not available" -q
goto :eof
:do16
if %jit% EQU 0 (
echo Test 16 Skipped due to absence of JIT support.
goto :eof
)
call :runsub 16 testout "JIT-specific features when JIT is available" -q
goto :eof
:do17
if %bits% EQU 16 (
echo Test 17 Skipped when running 16-bit tests.
goto :eof
)
if %bits% EQU 32 (
echo Test 17 Skipped when running 32-bit tests.
goto :eof
)
call :runsub 17 testout "POSIX interface, excluding UTF-8 and UCP" -q
goto :eof
:do18
if %bits% EQU 16 (
echo Test 18 Skipped when running 16-bit tests.
goto :eof
)
if %bits% EQU 32 (
echo Test 18 Skipped when running 32-bit tests.
goto :eof
)
call :runsub 1 testout "POSIX interface with UTF-8 and UCP" -q
goto :eof
:do19
call :runsub 1 testout "Serialization tests" -q
goto :eof
:conferror
@echo.
@echo Either your build is incomplete or you have a configuration error.
@echo.
@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS"
@echo project, pcre2_test.bat defines variables and automatically calls RunTest.bat.
@echo For manual testing of all available features, after configuring with cmake
@echo and building, you can run the built pcre2_test.bat. For best results with
@echo cmake builds and tests avoid directories with full path names that include
@echo spaces for source or build.
@echo.
@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed
@echo for input and verification should be found automatically when (from the
@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat
@echo runs all tests compatible with the linked pcre2 library but it can be given
@echo a test number as an argument.
@echo.
@echo If the build dir is not under the source dir you can either copy your exes
@echo to the source folder or copy RunTest.bat and the testdata folder to the
@echo location of your built exes and then run RunTest.bat.
@echo.
goto :eof

1434
pcre2/aclocal.m4 vendored Normal file

File diff suppressed because it is too large Load Diff

270
pcre2/ar-lib Executable file
View File

@ -0,0 +1,270 @@
#! /bin/sh
# Wrapper for Microsoft lib.exe
me=ar-lib
scriptversion=2012-03-01.08; # UTC
# Copyright (C) 2010-2014 Free Software Foundation, Inc.
# Written by Peter Rosin <peda@lysator.liu.se>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
# func_error message
func_error ()
{
echo "$me: $1" 1>&2
exit 1
}
file_conv=
# func_file_conv build_file
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv in
mingw)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin)
file=`cygpath -m "$file" || echo "$file"`
;;
wine)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_at_file at_file operation archive
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
# for each of them.
# When interpreting the content of the @FILE, do NOT use func_file_conv,
# since the user would need to supply preconverted file names to
# binutils ar, at least for MinGW.
func_at_file ()
{
operation=$2
archive=$3
at_file_contents=`cat "$1"`
eval set x "$at_file_contents"
shift
for member
do
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
done
}
case $1 in
'')
func_error "no command. Try '$0 --help' for more information."
;;
-h | --h*)
cat <<EOF
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
Members may be specified in a file named with @FILE.
EOF
exit $?
;;
-v | --v*)
echo "$me, version $scriptversion"
exit $?
;;
esac
if test $# -lt 3; then
func_error "you must specify a program, an action and an archive"
fi
AR=$1
shift
while :
do
if test $# -lt 2; then
func_error "you must specify a program, an action and an archive"
fi
case $1 in
-lib | -LIB \
| -ltcg | -LTCG \
| -machine* | -MACHINE* \
| -subsystem* | -SUBSYSTEM* \
| -verbose | -VERBOSE \
| -wx* | -WX* )
AR="$AR $1"
shift
;;
*)
action=$1
shift
break
;;
esac
done
orig_archive=$1
shift
func_file_conv "$orig_archive"
archive=$file
# strip leading dash in $action
action=${action#-}
delete=
extract=
list=
quick=
replace=
index=
create=
while test -n "$action"
do
case $action in
d*) delete=yes ;;
x*) extract=yes ;;
t*) list=yes ;;
q*) quick=yes ;;
r*) replace=yes ;;
s*) index=yes ;;
S*) ;; # the index is always updated implicitly
c*) create=yes ;;
u*) ;; # TODO: don't ignore the update modifier
v*) ;; # TODO: don't ignore the verbose modifier
*)
func_error "unknown action specified"
;;
esac
action=${action#?}
done
case $delete$extract$list$quick$replace,$index in
yes,* | ,yes)
;;
yesyes*)
func_error "more than one action specified"
;;
*)
func_error "no action specified"
;;
esac
if test -n "$delete"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
for member
do
case $1 in
@*)
func_at_file "${1#@}" -REMOVE "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
;;
esac
done
elif test -n "$extract"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
if test $# -gt 0; then
for member
do
case $1 in
@*)
func_at_file "${1#@}" -EXTRACT "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
;;
esac
done
else
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
do
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
done
fi
elif test -n "$quick$replace"; then
if test ! -f "$orig_archive"; then
if test -z "$create"; then
echo "$me: creating $orig_archive"
fi
orig_archive=
else
orig_archive=$archive
fi
for member
do
case $1 in
@*)
func_file_conv "${1#@}"
set x "$@" "@$file"
;;
*)
func_file_conv "$1"
set x "$@" "$file"
;;
esac
shift
shift
done
if test -n "$orig_archive"; then
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
else
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
fi
elif test -n "$list"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
$AR -NOLOGO -LIST "$archive" || exit $?
fi

View File

@ -0,0 +1,22 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

347
pcre2/compile Executable file
View File

@ -0,0 +1,347 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2012-10-14.11; # UTC
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
nl='
'
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent tools from complaining about whitespace usage.
IFS=" "" $nl"
file_conv=
# func_file_conv build_file lazy
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts. If the determined conversion
# type is listed in (the comma separated) LAZY, no conversion will
# take place.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv/,$2, in
*,$file_conv,*)
;;
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_cl_dashL linkdir
# Make cl look for libraries in LINKDIR
func_cl_dashL ()
{
func_file_conv "$1"
if test -z "$lib_path"; then
lib_path=$file
else
lib_path="$lib_path;$file"
fi
linker_opts="$linker_opts -LIBPATH:$file"
}
# func_cl_dashl library
# Do a library search-path lookup for cl
func_cl_dashl ()
{
lib=$1
found=no
save_IFS=$IFS
IFS=';'
for dir in $lib_path $LIB
do
IFS=$save_IFS
if $shared && test -f "$dir/$lib.dll.lib"; then
found=yes
lib=$dir/$lib.dll.lib
break
fi
if test -f "$dir/$lib.lib"; then
found=yes
lib=$dir/$lib.lib
break
fi
if test -f "$dir/lib$lib.a"; then
found=yes
lib=$dir/lib$lib.a
break
fi
done
IFS=$save_IFS
if test "$found" != yes; then
lib=$lib.lib
fi
}
# func_cl_wrapper cl arg...
# Adjust compile command to suit cl
func_cl_wrapper ()
{
# Assume a capable shell
lib_path=
shared=:
linker_opts=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
eat=1
case $2 in
*.o | *.[oO][bB][jJ])
func_file_conv "$2"
set x "$@" -Fo"$file"
shift
;;
*)
func_file_conv "$2"
set x "$@" -Fe"$file"
shift
;;
esac
;;
-I)
eat=1
func_file_conv "$2" mingw
set x "$@" -I"$file"
shift
;;
-I*)
func_file_conv "${1#-I}" mingw
set x "$@" -I"$file"
shift
;;
-l)
eat=1
func_cl_dashl "$2"
set x "$@" "$lib"
shift
;;
-l*)
func_cl_dashl "${1#-l}"
set x "$@" "$lib"
shift
;;
-L)
eat=1
func_cl_dashL "$2"
;;
-L*)
func_cl_dashL "${1#-L}"
;;
-static)
shared=false
;;
-Wl,*)
arg=${1#-Wl,}
save_ifs="$IFS"; IFS=','
for flag in $arg; do
IFS="$save_ifs"
linker_opts="$linker_opts $flag"
done
IFS="$save_ifs"
;;
-Xlinker)
eat=1
linker_opts="$linker_opts $2"
;;
-*)
set x "$@" "$1"
shift
;;
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
func_file_conv "$1"
set x "$@" -Tp"$file"
shift
;;
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
func_file_conv "$1" mingw
set x "$@" "$file"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -n "$linker_opts"; then
linker_opts="-link$linker_opts"
fi
exec "$@" $linker_opts
exit 1
}
eat=
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand '-c -o'.
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file 'INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
func_cl_wrapper "$@" # Doesn't return...
;;
esac
ofile=
cfile=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
# So we strip '-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no '-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# '.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

47
pcre2/config-cmake.h.in Normal file
View File

@ -0,0 +1,47 @@
/* config.h for CMake builds */
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_INTTYPES_H 1
#cmakedefine HAVE_STDINT_H 1
#cmakedefine HAVE_STRERROR 1
#cmakedefine HAVE_SYS_STAT_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine HAVE_WINDOWS_H 1
#cmakedefine HAVE_BCOPY 1
#cmakedefine HAVE_MEMMOVE 1
#cmakedefine PCRE2_STATIC 1
#cmakedefine SUPPORT_PCRE2_8 1
#cmakedefine SUPPORT_PCRE2_16 1
#cmakedefine SUPPORT_PCRE2_32 1
#cmakedefine PCRE2_DEBUG 1
#cmakedefine SUPPORT_LIBBZ2 1
#cmakedefine SUPPORT_LIBEDIT 1
#cmakedefine SUPPORT_LIBREADLINE 1
#cmakedefine SUPPORT_LIBZ 1
#cmakedefine SUPPORT_JIT 1
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
#cmakedefine SUPPORT_UNICODE 1
#cmakedefine SUPPORT_VALGRIND 1
#cmakedefine BSR_ANYCRLF 1
#cmakedefine EBCDIC 1
#cmakedefine EBCDIC_NL25 1
#cmakedefine HEAP_MATCH_RECURSE 1
#define LINK_SIZE @PCRE2_LINK_SIZE@
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
#define MATCH_LIMIT_RECURSION @PCRE2_MATCH_LIMIT_RECURSION@
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
#define MAX_NAME_SIZE 32
#define MAX_NAME_COUNT 10000
/* end config.h for CMake builds */

1421
pcre2/config.guess vendored Executable file

File diff suppressed because it is too large Load Diff

1807
pcre2/config.sub vendored Executable file

File diff suppressed because it is too large Load Diff

18234
pcre2/configure vendored Executable file

File diff suppressed because it is too large Load Diff

905
pcre2/configure.ac Normal file
View File

@ -0,0 +1,905 @@
dnl Process this file with autoconf to produce a configure script.
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
dnl the leading zeros may cause them to be treated as invalid octal constants
dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
dnl a check further down that throws an error if 08 or 09 are used.
dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre2_major, [10])
m4_define(pcre2_minor, [20])
m4_define(pcre2_prerelease, [])
m4_define(pcre2_date, [2015-06-30])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre2_8_version, [2:0:2])
m4_define(libpcre2_16_version, [2:0:2])
m4_define(libpcre2_32_version, [2:0:2])
m4_define(libpcre2_posix_version, [0:0:0])
AC_PREREQ(2.57)
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
AC_CONFIG_SRCDIR([src/pcre2.h.in])
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_CONFIG_HEADERS(src/config.h)
# This is a new thing required to stop a warning from automake 1.12
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
# This was added at the suggestion of libtoolize (03-Jan-10)
AC_CONFIG_MACRO_DIR([m4])
# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any
# other compiler. There doesn't seem to be a standard way of getting rid of the
# -g (which I don't think is needed for a production library). This fudge seems
# to achieve the necessary. First, we remember the externally set values of
# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is
# not set, it will be set to Autoconf's defaults. Afterwards, if the original
# values were not set, remove the -g from the Autoconf defaults.
remember_set_CFLAGS="$CFLAGS"
AC_PROG_CC
AM_PROG_CC_C_O
if test "x$remember_set_CFLAGS" = "x"
then
if test "$CFLAGS" = "-g -O2"
then
CFLAGS="-O2"
elif test "$CFLAGS" = "-g"
then
CFLAGS=""
fi
fi
# Check for a 64-bit integer type
AC_TYPE_INT64_T
AC_PROG_INSTALL
AC_LIBTOOL_WIN32_DLL
LT_INIT
AC_PROG_LN_S
# Check for GCC visibility feature
PCRE2_VISIBILITY
# Versioning
PCRE2_MAJOR="pcre2_major"
PCRE2_MINOR="pcre2_minor"
PCRE2_PRERELEASE="pcre2_prerelease"
PCRE2_DATE="pcre2_date"
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
then
echo "***"
echo "*** Minor version number $PCRE2_MINOR must not be used. ***"
echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***"
echo "***"
exit 1
fi
AC_SUBST(PCRE2_MAJOR)
AC_SUBST(PCRE2_MINOR)
AC_SUBST(PCRE2_PRERELEASE)
AC_SUBST(PCRE2_DATE)
# Set a more sensible default value for $(htmldir).
if test "x$htmldir" = 'x${docdir}'
then
htmldir='${docdir}/html'
fi
# Force an error for PCRE1 size options
AC_ARG_ENABLE(pcre8,,,enable_pcre8=no)
AC_ARG_ENABLE(pcre16,,,enable_pcre16=no)
AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
then
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
exit 1
fi
# Handle --disable-pcre2-8 (enabled by default)
AC_ARG_ENABLE(pcre2-8,
AS_HELP_STRING([--disable-pcre2-8],
[disable 8 bit character support]),
, enable_pcre2_8=unset)
AC_SUBST(enable_pcre2_8)
# Handle --enable-pcre2-16 (disabled by default)
AC_ARG_ENABLE(pcre2-16,
AS_HELP_STRING([--enable-pcre2-16],
[enable 16 bit character support]),
, enable_pcre2_16=unset)
AC_SUBST(enable_pcre2_16)
# Handle --enable-pcre2-32 (disabled by default)
AC_ARG_ENABLE(pcre2-32,
AS_HELP_STRING([--enable-pcre2-32],
[enable 32 bit character support]),
, enable_pcre2_32=unset)
AC_SUBST(enable_pcre2_32)
# Handle --dnable-debug (disabled by default)
AC_ARG_ENABLE(debug,
AS_HELP_STRING([--enable-debug],
[enable debugging code]),
, enable_debug=no)
# Handle --enable-jit (disabled by default)
AC_ARG_ENABLE(jit,
AS_HELP_STRING([--enable-jit],
[enable Just-In-Time compiling support]),
, enable_jit=no)
# Handle --disable-pcre2grep-jit (enabled by default)
AC_ARG_ENABLE(pcre2grep-jit,
AS_HELP_STRING([--disable-pcre2grep-jit],
[disable JIT support in pcre2grep]),
, enable_pcre2grep_jit=yes)
# Handle --enable-rebuild-chartables
AC_ARG_ENABLE(rebuild-chartables,
AS_HELP_STRING([--enable-rebuild-chartables],
[rebuild character tables in current locale]),
, enable_rebuild_chartables=no)
# Handle --disable-unicode (enabled by default)
AC_ARG_ENABLE(unicode,
AS_HELP_STRING([--disable-unicode],
[disable Unicode support]),
, enable_unicode=unset)
# Handle newline options
ac_pcre2_newline=lf
AC_ARG_ENABLE(newline-is-cr,
AS_HELP_STRING([--enable-newline-is-cr],
[use CR as newline character]),
ac_pcre2_newline=cr)
AC_ARG_ENABLE(newline-is-lf,
AS_HELP_STRING([--enable-newline-is-lf],
[use LF as newline character (default)]),
ac_pcre2_newline=lf)
AC_ARG_ENABLE(newline-is-crlf,
AS_HELP_STRING([--enable-newline-is-crlf],
[use CRLF as newline sequence]),
ac_pcre2_newline=crlf)
AC_ARG_ENABLE(newline-is-anycrlf,
AS_HELP_STRING([--enable-newline-is-anycrlf],
[use CR, LF, or CRLF as newline sequence]),
ac_pcre2_newline=anycrlf)
AC_ARG_ENABLE(newline-is-any,
AS_HELP_STRING([--enable-newline-is-any],
[use any valid Unicode newline sequence]),
ac_pcre2_newline=any)
enable_newline="$ac_pcre2_newline"
# Handle --enable-bsr-anycrlf
AC_ARG_ENABLE(bsr-anycrlf,
AS_HELP_STRING([--enable-bsr-anycrlf],
[\R matches only CR, LF, CRLF by default]),
, enable_bsr_anycrlf=no)
# Handle --enable-ebcdic
AC_ARG_ENABLE(ebcdic,
AS_HELP_STRING([--enable-ebcdic],
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
, enable_ebcdic=no)
# Handle --enable-ebcdic-nl25
AC_ARG_ENABLE(ebcdic-nl25,
AS_HELP_STRING([--enable-ebcdic-nl25],
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
, enable_ebcdic_nl25=no)
# Handle --disable-stack-for-recursion
AC_ARG_ENABLE(stack-for-recursion,
AS_HELP_STRING([--disable-stack-for-recursion],
[don't use stack recursion when matching]),
, enable_stack_for_recursion=yes)
# Handle --enable-pcre2grep-libz
AC_ARG_ENABLE(pcre2grep-libz,
AS_HELP_STRING([--enable-pcre2grep-libz],
[link pcre2grep with libz to handle .gz files]),
, enable_pcre2grep_libz=no)
# Handle --enable-pcre2grep-libbz2
AC_ARG_ENABLE(pcre2grep-libbz2,
AS_HELP_STRING([--enable-pcre2grep-libbz2],
[link pcre2grep with libbz2 to handle .bz2 files]),
, enable_pcre2grep_libbz2=no)
# Handle --with-pcre2grep-bufsize=N
AC_ARG_WITH(pcre2grep-bufsize,
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
[pcre2grep buffer size (default=20480, minimum=8192)]),
, with_pcre2grep_bufsize=20480)
# Handle --enable-pcre2test-libedit
AC_ARG_ENABLE(pcre2test-libedit,
AS_HELP_STRING([--enable-pcre2test-libedit],
[link pcre2test with libedit]),
, enable_pcre2test_libedit=no)
# Handle --enable-pcre2test-libreadline
AC_ARG_ENABLE(pcre2test-libreadline,
AS_HELP_STRING([--enable-pcre2test-libreadline],
[link pcre2test with libreadline]),
, enable_pcre2test_libreadline=no)
# Handle --with-link-size=N
AC_ARG_WITH(link-size,
AS_HELP_STRING([--with-link-size=N],
[internal link size (2, 3, or 4 allowed; default=2)]),
, with_link_size=2)
# Handle --with-parens-nest-limit=N
AC_ARG_WITH(parens-nest-limit,
AS_HELP_STRING([--with-parens-nest-limit=N],
[nested parentheses limit (default=250)]),
, with_parens_nest_limit=250)
# Handle --with-match-limit=N
AC_ARG_WITH(match-limit,
AS_HELP_STRING([--with-match-limit=N],
[default limit on internal looping (default=10000000)]),
, with_match_limit=10000000)
# Handle --with-match-limit_recursion=N
#
# Note: In config.h, the default is to define MATCH_LIMIT_RECURSION
# symbolically as MATCH_LIMIT, which in turn is defined to be some numeric
# value (e.g. 10000000). MATCH_LIMIT_RECURSION can otherwise be set to some
# different numeric value (or even the same numeric value as MATCH_LIMIT,
# though no longer defined in terms of the latter).
#
AC_ARG_WITH(match-limit-recursion,
AS_HELP_STRING([--with-match-limit-recursion=N],
[default limit on internal recursion (default=MATCH_LIMIT)]),
, with_match_limit_recursion=MATCH_LIMIT)
# Handle --enable-valgrind
AC_ARG_ENABLE(valgrind,
AS_HELP_STRING([--enable-valgrind],
[valgrind support]),
, enable_valgrind=no)
# Enable code coverage reports using gcov
AC_ARG_ENABLE(coverage,
AS_HELP_STRING([--enable-coverage],
[enable code coverage reports using gcov]),
, enable_coverage=no)
# Set the default value for pcre2-8
if test "x$enable_pcre2_8" = "xunset"
then
enable_pcre2_8=yes
fi
# Set the default value for pcre2-16
if test "x$enable_pcre2_16" = "xunset"
then
enable_pcre2_16=no
fi
# Set the default value for pcre2-32
if test "x$enable_pcre2_32" = "xunset"
then
enable_pcre2_32=no
fi
# Make sure at least one library is selected
if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono"
then
AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
fi
# Unicode is enabled by default.
if test "x$enable_unicode" = "xunset"
then
enable_unicode=yes
fi
# Convert the newline identifier into the appropriate integer value. These must
# agree with the PCRE2_NEWLINE_xxx values in pcre2.h.
case "$enable_newline" in
cr) ac_pcre2_newline_value=1 ;;
lf) ac_pcre2_newline_value=2 ;;
crlf) ac_pcre2_newline_value=3 ;;
any) ac_pcre2_newline_value=4 ;;
anycrlf) ac_pcre2_newline_value=5 ;;
*)
AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
;;
esac
# --enable-ebcdic-nl25 implies --enable-ebcdic
if test "x$enable_ebcdic_nl25" = "xyes"; then
enable_ebcdic=yes
fi
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
# Also check that UTF support is not requested, because PCRE2 cannot handle
# EBCDIC and UTF in the same build. To do so it would need to use different
# character constants depending on the mode.
#
if test "x$enable_ebcdic" = "xyes"; then
enable_rebuild_chartables=yes
if test "x$enable_unicode" = "xyes"; then
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
fi
fi
# Check argument to --with-link-size
case "$with_link_size" in
2|3|4) ;;
*)
AC_MSG_ERROR([invalid argument \"$with_link_size\" to --with-link-size option])
;;
esac
AH_TOP([
/* PCRE2 is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
so that config.h is included at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */])
# Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
# Conditional compilation
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
# Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
AC_TYPE_SIZE_T
# Checks for library functions.
AC_CHECK_FUNCS(bcopy memmove strerror)
# Check for the availability of libz (aka zlib)
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
# as for libz. However, this had the following problem, diagnosed and fixed by
# a user:
#
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
# under Win32.
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
# therefore missing the function definition.
# - The compiler thus generates a "C" signature for the test function.
# - The linker fails to find the "C" function.
# - PCRE2 fails to configure if asked to do so against libbz2.
#
# Solution:
#
# - Replace the AC_CHECK_LIB test with a custom test.
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
# Original test
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
#
# Custom test follows
AC_MSG_CHECKING([for libbz2])
OLD_LIBS="$LIBS"
LIBS="$LIBS -lbz2"
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
#ifdef HAVE_BZLIB_H
#include <bzlib.h>
#endif]],
[[return (int)BZ2_bzopen("conftest", "rb");]])],
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
AC_MSG_RESULT([no]))
LIBS="$OLD_LIBS"
# Check for the availabiity of libreadline
if test "$enable_pcre2test_libreadline" = "yes"; then
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
[LIBREADLINE=""],
[-ltermcap])],
[-lncursesw])],
[-lncurses])],
[-lcurses])],
[-ltinfo])])
AC_SUBST(LIBREADLINE)
if test -n "$LIBREADLINE"; then
if test "$LIBREADLINE" != "-lreadline"; then
echo "-lreadline needs $LIBREADLINE"
LIBREADLINE="-lreadline $LIBREADLINE"
fi
fi
fi
# Check for the availability of libedit. Different distributions put its
# headers in different places. Try to cover the most common ones.
if test "$enable_pcre2test_libedit" = "yes"; then
AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
[AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
[AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
fi
# This facilitates -ansi builds under Linux
dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc])
PCRE2_STATIC_CFLAG=""
if test "x$enable_shared" = "xno" ; then
AC_DEFINE([PCRE2_STATIC], [1], [
Define to any value if linking statically (TODO: make nice with Libtool)])
PCRE2_STATIC_CFLAG="-DPCRE2_STATIC"
fi
AC_SUBST(PCRE2_STATIC_CFLAG)
# Here is where PCRE2-specific defines are handled
if test "$enable_pcre2_8" = "yes"; then
AC_DEFINE([SUPPORT_PCRE2_8], [], [
Define to any value to enable the 8 bit PCRE2 library.])
fi
if test "$enable_pcre2_16" = "yes"; then
AC_DEFINE([SUPPORT_PCRE2_16], [], [
Define to any value to enable the 16 bit PCRE2 library.])
fi
if test "$enable_pcre2_32" = "yes"; then
AC_DEFINE([SUPPORT_PCRE2_32], [], [
Define to any value to enable the 32 bit PCRE2 library.])
fi
if test "$enable_debug" = "yes"; then
AC_DEFINE([PCRE2_DEBUG], [], [
Define to any value to include debugging code.])
fi
# Unless running under Windows, JIT support requires pthreads.
if test "$enable_jit" = "yes"; then
if test "$HAVE_WINDOWS_H" != "1"; then
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
CC="$PTHREAD_CC"
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
LIBS="$PTHREAD_LIBS $LIBS"
fi
AC_DEFINE([SUPPORT_JIT], [], [
Define to any value to enable support for Just-In-Time compiling.])
else
enable_pcre2grep_jit="no"
fi
if test "$enable_pcre2grep_jit" = "yes"; then
AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
Define to any value to enable JIT support in pcre2grep.])
fi
if test "$enable_unicode" = "yes"; then
AC_DEFINE([SUPPORT_UNICODE], [], [
Define to any value to enable support for Unicode and UTF encoding.
This will work even in an EBCDIC environment, but it is incompatible
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
code *or* ASCII/Unicode, but not both at once.])
fi
if test "$enable_stack_for_recursion" = "no"; then
AC_DEFINE([HEAP_MATCH_RECURSE], [], [
PCRE2 uses recursive function calls to handle backtracking while
matching. This can sometimes be a problem on systems that have
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
version that doesn't use recursion in the match() function; instead
it creates its own stack by steam using memory from the heap. For more
detail, see the comments and other stuff just above the match() function.])
fi
if test "$enable_pcre2grep_libz" = "yes"; then
AC_DEFINE([SUPPORT_LIBZ], [], [
Define to any value to allow pcre2grep to be linked with libz, so that it is
able to handle .gz files.])
fi
if test "$enable_pcre2grep_libbz2" = "yes"; then
AC_DEFINE([SUPPORT_LIBBZ2], [], [
Define to any value to allow pcre2grep to be linked with libbz2, so that it
is able to handle .bz2 files.])
fi
if test $with_pcre2grep_bufsize -lt 8192 ; then
AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
with_pcre2grep_bufsize="8192"
else
if test $? -gt 1 ; then
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
fi
fi
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
The value of PCRE2GREP_BUFSIZE determines the size of buffer used by pcre2grep
to hold parts of the file it is searching. This is also the minimum value.
The actual amount of memory used by pcre2grep is three times this number,
because it allows for the buffering of "before" and "after" lines.])
if test "$enable_pcre2test_libedit" = "yes"; then
AC_DEFINE([SUPPORT_LIBEDIT], [], [
Define to any value to allow pcre2test to be linked with libedit.])
LIBREADLINE="$LIBEDIT"
elif test "$enable_pcre2test_libreadline" = "yes"; then
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
Define to any value to allow pcre2test to be linked with libreadline.])
fi
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
and 5 (ANYCRLF).])
if test "$enable_bsr_anycrlf" = "yes"; then
AC_DEFINE([BSR_ANYCRLF], [], [
By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
The build-time default can be overridden by the user of PCRE2 at runtime.])
fi
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
The value of LINK_SIZE determines the number of bytes used to store
links as offsets within the compiled regex. The default is 2, which
allows for compiled patterns up to 64K long. This covers the vast
majority of cases. However, PCRE2 can also be compiled to use 3 or 4
bytes instead. This allows for longer patterns in extreme cases.])
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
pcre2_match(). There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular
expressions that take for ever to determine that they do not match.
The default is set very large so that it does not accidentally catch
legitimate cases.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable
to limit the depth of recursive calls of match() more strictly, in
order to restrict the maximum amount of stack (or heap, if
HEAP_MATCH_RECURSE is defined) that is used. The value of
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
There is a runtime method for setting a different limit.])
AC_DEFINE([MAX_NAME_SIZE], [32], [
This limit is parameterized just in case anybody ever wants to
change it. Care must be taken if it is increased, because it guards
against integer overflow caused by enormously large patterns.])
AC_DEFINE([MAX_NAME_COUNT], [10000], [
This limit is parameterized just in case anybody ever wants to
change it. Care must be taken if it is increased, because it guards
against integer overflow caused by enormously large patterns.])
AH_VERBATIM([PCRE2_EXP_DEFN], [
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, a suitable
__declspec value is used for Windows systems; in other environments
"extern" is used for a C compiler and "extern C" for a C++ compiler.
This macro apears at the start of every exported function that is part
of the external API. It does not appear on functions that are "external"
in the C sense, but which are internal to the library. */
#undef PCRE2_EXP_DEFN])
if test "$enable_ebcdic" = "yes"; then
AC_DEFINE_UNQUOTED([EBCDIC], [], [
If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
is not possible to build a version of PCRE2 that supports both EBCDIC and
UTF-8/16/32.])
fi
if test "$enable_ebcdic_nl25" = "yes"; then
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
In an EBCDIC environment, define this macro to any value to arrange for
the NL character to be 0x25 instead of the default 0x15. NL plays the role
that LF does in an ASCII/Unicode environment.])
fi
if test "$enable_valgrind" = "yes"; then
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
Define to any value for valgrind support to find invalid memory reads.])
fi
# Platform specific issues
NO_UNDEFINED=
EXPORT_ALL_SYMBOLS=
case $host_os in
cygwin* | mingw* )
if test X"$enable_shared" = Xyes; then
NO_UNDEFINED="-no-undefined"
EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols"
fi
;;
esac
# The extra LDFLAGS for each particular library. The libpcre2*_version values
# are m4 variables, assigned above.
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
$NO_UNDEFINED -version-info libpcre2_8_version"
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
$NO_UNDEFINED -version-info libpcre2_16_version"
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
$NO_UNDEFINED -version-info libpcre2_32_version"
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
$NO_UNDEFINED -version-info libpcre2_posix_version"
AC_SUBST(EXTRA_LIBPCRE2_8_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRE2_16_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRE2_32_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
# When we run 'make distcheck', use these arguments. Turning off compiler
# optimization makes it run faster.
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit --enable-utf"
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
# specified, the relevant library is available.
if test "$enable_pcre2grep_libz" = "yes"; then
if test "$HAVE_ZLIB_H" != "1"; then
echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
exit 1
fi
if test "$HAVE_LIBZ" != "1"; then
echo "** Cannot --enable-pcre2grep-libz because libz was not found"
exit 1
fi
LIBZ="-lz"
fi
AC_SUBST(LIBZ)
if test "$enable_pcre2grep_libbz2" = "yes"; then
if test "$HAVE_BZLIB_H" != "1"; then
echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
exit 1
fi
if test "$HAVE_LIBBZ2" != "1"; then
echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
exit 1
fi
LIBBZ2="-lbz2"
fi
AC_SUBST(LIBBZ2)
# Similarly for --enable-pcre2test-readline
if test "$enable_pcre2test_libedit" = "yes"; then
if test "$enable_pcre2test_libreadline" = "yes"; then
echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
exit 1
fi
if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
"$HAVE_READLINE_READLINE_H" != "1"; then
echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
echo "** nor readline/readline.h was found."
exit 1
fi
if test -z "$LIBEDIT"; then
echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
exit 1
fi
fi
if test "$enable_pcre2test_libreadline" = "yes"; then
if test "$HAVE_READLINE_H" != "1"; then
echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
exit 1
fi
if test "$HAVE_HISTORY_H" != "1"; then
echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
exit 1
fi
if test -z "$LIBREADLINE"; then
echo "** Cannot --enable-pcre2test-readline because readline library was not found."
exit 1
fi
fi
# Handle valgrind support
if test "$enable_valgrind" = "yes"; then
m4_ifdef([PKG_CHECK_MODULES],
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
[AC_MSG_ERROR([pkg-config not supported])])
fi
# Handle code coverage reporting support
if test "$enable_coverage" = "yes"; then
if test "x$GCC" != "xyes"; then
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
fi
# ccache is incompatible with gcov
AC_PATH_PROG([SHTOOL],[shtool],[false])
case `$SHTOOL path $CC` in
*ccache*) cc_ccache=yes;;
*) cc_ccache=no;;
esac
if test "$cc_ccache" = "yes"; then
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
fi
fi
AC_ARG_VAR([LCOV],[the ltp lcov program])
AC_PATH_PROG([LCOV],[lcov],[false])
if test "x$LCOV" = "xfalse"; then
AC_MSG_ERROR([lcov not found])
fi
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
AC_PATH_PROG([GENHTML],[genhtml],[false])
if test "x$GENHTML" = "xfalse"; then
AC_MSG_ERROR([genhtml not found])
fi
# Set flags needed for gcov
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
GCOV_LIBS="-lgcov"
AC_SUBST([GCOV_CFLAGS])
AC_SUBST([GCOV_CXXFLAGS])
AC_SUBST([GCOV_LIBS])
fi # enable_coverage
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
# Produce these files, in addition to config.h.
AC_CONFIG_FILES(
Makefile
libpcre2-8.pc
libpcre2-16.pc
libpcre2-32.pc
libpcre2-posix.pc
pcre2-config
src/pcre2.h
)
# Make the generated script files executable.
AC_CONFIG_COMMANDS([script-chmod], [chmod a+x pcre2-config])
# Make sure that pcre2_chartables.c is removed in case the method for
# creating it was changed by reconfiguration.
AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre2_chartables.c])
AC_OUTPUT
# Print out a nice little message after configure is run displaying the
# chosen options.
ebcdic_nl_code=n/a
if test "$enable_ebcdic_nl25" = "yes"; then
ebcdic_nl_code=0x25
elif test "$enable_ebcdic" = "yes"; then
ebcdic_nl_code=0x15
fi
cat <<EOF
$PACKAGE-$VERSION configuration summary:
Install prefix .................. : ${prefix}
C preprocessor .................. : ${CPP}
C compiler ...................... : ${CC}
Linker .......................... : ${LD}
C preprocessor flags ............ : ${CPPFLAGS}
C compiler flags ................ : ${CFLAGS} ${VISIBILITY_CFLAGS}
Linker flags .................... : ${LDFLAGS}
Extra libraries ................. : ${LIBS}
Build 8-bit pcre2 library ....... : ${enable_pcre2_8}
Build 16-bit pcre2 library ...... : ${enable_pcre2_16}
Build 32-bit pcre2 library ...... : ${enable_pcre2_32}
Include debugging code .......... : ${enable_debug}
Enable JIT compiling support .... : ${enable_jit}
Enable Unicode support .......... : ${enable_unicode}
Newline char/sequence ........... : ${enable_newline}
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
EBCDIC coding ................... : ${enable_ebcdic}
EBCDIC code for NL .............. : ${ebcdic_nl_code}
Rebuild char tables ............. : ${enable_rebuild_chartables}
Use stack recursion ............. : ${enable_stack_for_recursion}
Internal link size .............. : ${with_link_size}
Nested parentheses limit ........ : ${with_parens_nest_limit}
Match limit ..................... : ${with_match_limit}
Match limit recursion ........... : ${with_match_limit_recursion}
Build shared libs ............... : ${enable_shared}
Build static libs ............... : ${enable_static}
Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
Link pcre2test with libedit ..... : ${enable_pcre2test_libedit}
Link pcre2test with libreadline . : ${enable_pcre2test_libreadline}
Valgrind support ................ : ${enable_valgrind}
Code coverage ................... : ${enable_coverage}
EOF
dnl end configure.ac

791
pcre2/depcomp Executable file
View File

@ -0,0 +1,791 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2013-05-30.07; # UTC
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
Run PROGRAMS ARGS to compile a file, generating dependencies
as side-effects.
Environment variables:
depmode Dependency tracking mode.
source Source file read by 'PROGRAMS ARGS'.
object Object file output by 'PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "depcomp $scriptversion"
exit $?
;;
esac
# Get the directory component of the given path, and save it in the
# global variables '$dir'. Note that this directory component will
# be either empty or ending with a '/' character. This is deliberate.
set_dir_from ()
{
case $1 in
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
*) dir=;;
esac
}
# Get the suffix-stripped basename of the given path, and save it the
# global variable '$base'.
set_base_from ()
{
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
}
# If no dependency file was actually created by the compiler invocation,
# we still have to create a dummy depfile, to avoid errors with the
# Makefile "include basename.Plo" scheme.
make_dummy_depfile ()
{
echo "#dummy" > "$depfile"
}
# Factor out some common post-processing of the generated depfile.
# Requires the auxiliary global variable '$tmpdepfile' to be set.
aix_post_process_depfile ()
{
# If the compiler actually managed to produce a dependency file,
# post-process it.
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependency.h'.
# Do two passes, one to just change these to
# $object: dependency.h
# and one to simply output
# dependency.h:
# which is needed to avoid the deleted-header problem.
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
} > "$depfile"
rm -f "$tmpdepfile"
else
make_dummy_depfile
fi
}
# A tabulation character.
tab=' '
# A newline character.
nl='
'
# Character ranges might be problematic outside the C locale.
# These definitions help.
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
lower=abcdefghijklmnopqrstuvwxyz
digits=0123456789
alpha=${upper}${lower}
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1
fi
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
depfile=${depfile-`echo "$object" |
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Avoid interferences from the environment.
gccflag= dashmflag=
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
# here, because this file can only contain one case statement.
if test "$depmode" = hp; then
# HP compiler uses -M and no extra arg.
gccflag=-M
depmode=gcc
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
## it if -MD -MP comes after the -MF stuff. Hmm.
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
## the command line argument order; so add the flags where they
## appear in depend2.am. Note that the slowdown incurred here
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
for arg
do
case $arg in
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
*) set fnord "$@" "$arg" ;;
esac
shift # fnord
shift # $arg
done
"$@"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
mv "$tmpdepfile" "$depfile"
;;
gcc)
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
## (see the conditional assignment to $gccflag above).
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say). Also, it might not be
## supported by the other compilers which use the 'gcc' depmode.
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
gccflag=-MD,
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The second -e expression handles DOS-style file names with drive
# letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the "deleted header file" problem.
## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
## Some versions of gcc put a space before the ':'. On the theory
## that the space means something, we add a space to the output as
## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
sgi)
if test "$libtool" = yes; then
"$@" "-Wp,-MDupdate,$tmpdepfile"
else
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like '#:fec' to the end of the
# dependency line.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
| tr "$nl" ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile"
;;
xlc)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
aix)
# The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
tmpdepfile3=$dir.libs/$base.u
"$@" -Wc,-M
else
tmpdepfile1=$dir$base.u
tmpdepfile2=$dir$base.u
tmpdepfile3=$dir$base.u
"$@" -M
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
aix_post_process_depfile
;;
tcc)
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
# FIXME: That version still under development at the moment of writing.
# Make that this statement remains true also for stable, released
# versions.
# It will wrap lines (doesn't matter whether long or short) with a
# trailing '\', as in:
#
# foo.o : \
# foo.c \
# foo.h \
#
# It will put a trailing '\' even on the last line, and will use leading
# spaces rather than leading tabs (at least since its commit 0394caf7
# "Emit spaces for -MD").
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
# We have to change lines of the first kind to '$object: \'.
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
# And for each line of the second kind, we have to emit a 'dep.h:'
# dummy dependency, to avoid the deleted-header problem.
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile"
;;
## The order of this option in the case statement is important, since the
## shell code in configure will try each of these formats in the order
## listed in this file. A plain '-MD' option would be understood by many
## compilers, so we must ensure this comes after the gcc and icc options.
pgcc)
# Portland's C compiler understands '-MD'.
# Will always output deps to 'file.d' where file is the root name of the
# source file under compilation, even if file resides in a subdirectory.
# The object file name does not affect the name of the '.d' file.
# pgcc 10.2 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\' :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
set_dir_from "$object"
# Use the source, not the object, to determine the base name, since
# that's sadly what pgcc will do too.
set_base_from "$source"
tmpdepfile=$base.d
# For projects that build the same source file twice into different object
# files, the pgcc approach of using the *source* file root name can cause
# problems in parallel builds. Use a locking strategy to avoid stomping on
# the same $tmpdepfile.
lockdir=$base.d-lock
trap "
echo '$0: caught signal, cleaning up...' >&2
rmdir '$lockdir'
exit 1
" 1 2 13 15
numtries=100
i=$numtries
while test $i -gt 0; do
# mkdir is a portable test-and-set.
if mkdir "$lockdir" 2>/dev/null; then
# This process acquired the lock.
"$@" -MD
stat=$?
# Release the lock.
rmdir "$lockdir"
break
else
# If the lock is being held by a different process, wait
# until the winning process is done or we timeout.
while test -d "$lockdir" && test $i -gt 0; do
sleep 1
i=`expr $i - 1`
done
fi
i=`expr $i - 1`
done
trap - 1 2 13 15
if test $i -le 0; then
echo "$0: failed to acquire lock after $numtries attempts" >&2
echo "$0: check lockdir '$lockdir'" >&2
exit 1
fi
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp2)
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
# compilers, which have integrated preprocessors. The correct option
# to use with these is +Maked; it writes dependencies to a file named
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
"$@" -Wc,+Maked
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
"$@" +Maked
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
# Add 'dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
# Libtool generates 2 separate objects for the 2 libraries. These
# two compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir$base.o.d # libtool 1.5
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
# Same post-processing that is required for AIX mode.
aix_post_process_depfile
;;
msvc7)
if test "$libtool" = yes; then
showIncludes=-Wc,-showIncludes
else
showIncludes=-showIncludes
fi
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The first sed program below extracts the file names and escapes
# backslashes for cygpath. The second sed program outputs the file
# name when reading, but also accumulates all include files in the
# hold buffer in order to output them again at the end. This only
# works with sed implementations that can handle large buffers.
sed < "$tmpdepfile" -n '
/^Note: including file: *\(.*\)/ {
s//\1/
s/\\/\\\\/g
p
}' | $cygpath_u | sort -u | sed -n '
s/ /\\ /g
s/\(.*\)/'"$tab"'\1 \\/p
s/.\(.*\) \\/\1:/
H
$ {
s/.*/'"$tab"'/
G
p
}' >> "$depfile"
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
rm -f "$tmpdepfile"
;;
msvc7msys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
dashmstdout)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for ':'
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag |
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this sed invocation
# correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
dashXmstdout)
# This case only exists to satisfy depend.m4. It is never actually
# run, as this mode is specially recognized in the preamble.
exit 1
;;
makedepend)
"$@" || exit $?
# Remove any Libtool call
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# X makedepend
shift
cleared=no eat=no
for arg
do
case $cleared in
no)
set ""; shift
cleared=yes ;;
esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in
-D*|-I*)
set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object)
;;
*)
set fnord "$@" "$arg"; shift ;;
esac
done
obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
# makedepend may prepend the VPATH from the source file name to the object.
# No need to regex-escape $object, excess matching of '.' is harmless.
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process the last invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed '1,2d' "$tmpdepfile" \
| tr ' ' "$nl" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
cpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
"$@" -E \
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
| sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvisualcpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" "
for arg
do
case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
echo "$tab" >> "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none)
exec "$@"
;;
*)
echo "Unknown depmode $depmode" 1>&2
exit 1
;;
esac
exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

View File

@ -0,0 +1,391 @@
Building PCRE2 without using autotools
--------------------------------------
This document has been converted from the PCRE1 document. I have removed a
number of sections about building in various environments, as they applied only
to PCRE1 and are probably out of date.
This document contains the following sections:
General
Generic instructions for the PCRE2 C library
Stack size in Windows environments
Linking programs in Windows environments
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE2 on Windows with CMake
Testing with RunTest.bat
Building PCRE2 on native z/OS and z/VM
GENERAL
The basic PCRE2 library consists entirely of code written in Standard C, and so
should compile successfully on any system that has a Standard C compiler and
library.
The PCRE2 distribution includes a "configure" file for use by the
configure/make (autotools) build system, as found in many Unix-like
environments. The README file contains information about the options for
"configure".
There is also support for CMake, which some users prefer, especially in Windows
environments, though it can also be run in Unix-like environments. See the
section entitled "Building PCRE2 on Windows with CMake" below.
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
under the names src/config.h.generic and src/pcre2.h.generic. These are
provided for those who build PCRE2 without using "configure" or CMake. If you
use "configure" or CMake, the .generic versions are not used.
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
The following are generic instructions for building the PCRE2 C library "by
hand". If you are going to use CMake, this section does not apply to you; you
can skip ahead to the CMake section.
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
macro settings that it contains to whatever is appropriate for your
environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line
terminators.
When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
An alternative approach is not to edit src/config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in src/config.h are used has changed between releases. (In the
configure/make world, this is handled automatically.) When upgrading to a
new release, you are strongly advised to review src/config.h.generic
before re-using what you had previously.
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
(3) EITHER:
Copy or rename file src/pcre2_chartables.c.dist as
src/pcre2_chartables.c.
OR:
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
if you have set up src/config.h), and then run it with the single
argument "src/pcre2_chartables.c". This generates a set of standard
character tables and writes them to that file. The tables are generated
using the default C locale for your system. If you want to use a locale
that is specified by LC_xxx environment variables, add the -L option to
the dftables command. You must use this method if you are building on a
system that uses EBCDIC code.
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
specify alternative tables at run time.
(4) For an 8-bit library, compile the following source files from the src
directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
set -DHAVE_CONFIG_H if you have set up src/config.h with your
configuration, or else use other -D settings to change the configuration
as required.
pcre2_auto_possess.c
pcre2_chartables.c
pcre2_compile.c
pcre2_config.c
pcre2_context.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_jit_compile.c
pcre2_maketables.c
pcre2_match.c
pcre2_match_data.c
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substitute.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
pcre2_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE2 header files are first
sought in the src directory under the current directory. Otherwise you run
the risk of picking up a previously-installed file from somewhere else.
Note that you must compile pcre2_jit_compile.c, even if you have not
defined SUPPORT_JIT in src/config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre2_compile.c #includes other files from the sljit subdirectory, where
there should be 16 files, all of whose names begin with "sljit". It also
#includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should
not compile these yourself.
(5) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
If your system has static and shared libraries, you may have to do this
once for each type.
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
instead of the 8-bit library) just supply 16 or 32 as the value of
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
(7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the src/pcre2posix.h file and then
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
library.
(8) The pcre2test program can be linked with any combination of the 8-bit,
16-bit and 32-bit libraries (depending on what you selected in
src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
needs the pcre2posix wrapper library.
(9) Run pcre2test on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE2"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcre2test with the -16 option to do
16-bit tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for Unicode support, and will not run if you have
built PCRE2 without it. See the comments at the start of each testinput
file. If you have a suitable Unix-like shell, the RunTest script will run
the appropriate tests for you. The command "RunTest list" will output a
list of all the tests.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
system uses a different convention.
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
by running pcre2test with the -jit option. This is done automatically by
the RunTest script. You might also like to build and run the freestanding
JIT test program, src/pcre2_jit_test.c.
(11) If you want to use the pcre2grep command, compile and link
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
need the pcre2posix library).
STACK SIZE IN WINDOWS ENVIRONMENTS
The default processor stack size of 1Mb in some Windows environments is too
small for matching patterns that need much recursion. In particular, test 2 may
fail because of this. Normally, running out of stack causes a crash, but there
have been cases where the test program has just died silently. See your linker
documentation for how to increase stack size if you experience problems. If you
are using CMake (see "BUILDING PCRE2 ON WINDOWS WITH CMAKE" below) and the gcc
compiler, you can increase the stack size for pcre2test and pcre2grep by
setting the CMAKE_EXE_LINKER_FLAGS variable to "-Wl,--stack,8388608" (for
example). The Linux default of 8Mb is a reasonable choice for the stack, though
even that can be too small for some pattern/subject combinations.
PCRE2 has a compile configuration option to disable the use of stack for
recursion so that heap is used instead. However, pattern matching is
significantly slower when this is done. There is more about stack usage in the
"pcre2stack" documentation.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE2 library in the form of
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in the
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
There are two ways of building PCRE2 using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE2 under Windows.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
On both MinGW and Cygwin, PCRE2 should build correctly using:
./configure && make && make install
This should create two libraries called libpcre2-8 and libpcre2-posix. These
are independent libraries: when you link with libpcre2-posix you must also link
with libpcre2-8, which contains the basic functions.
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE2 to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE2 are in UNIX format, with LF
characters as line terminators. Unless your PCRE2 library uses a default
newline option that includes LF as a valid newline, it may be necessary to
change the line terminators in the test files to get some of the tests to work.
BUILDING PCRE2 ON WINDOWS WITH CMAKE
CMake is an alternative configuration facility that can be used instead of
"configure". CMake creates project files (make files, solution files, etc.)
tailored to numerous development environments, including Visual Studio,
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the
cache can be deleted by selecting "File > Delete Cache".
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
to start Cmake from the Windows Start menu, as this can lead to errors.
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
6. Hit the "Configure" button.
7. Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
8. The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
9. Hit "Configure" again. The adjacent "Generate" button should now be
active.
10. Hit "Generate".
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
12. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
on your configuration options, possibly other test programs) in the build
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
exe paths.
For manual testing with RunTest.bat, provided the build dir is a subdirectory
of the source directory: Open command shell window. Chdir to the location
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
To run only a particular test with RunTest.Bat provide a test number argument.
Otherwise:
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
have been created.
2. Edit RunTest.bat to indentify the full or relative location of
the pcre2 source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre2\pcre2-10.00
3. In a Windows command environment, chdir to the location of your bat and
exe programs.
4. Run RunTest.bat. Test outputs will automatically be compared to expected
results, and discrepancies will be identified in the console output.
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
applications can be supported through UNIX System Services, and in such an
environment PCRE2 can be built in the same way as in other systems. However, in
native z/OS (without UNIX System Services) and in z/VM, special ports are
required. For details, please see this web site:
http://www.zaconsultants.net
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course.
You may also download PCRE1 from WWW.CBTTAPE.ORG, file 882. Everything, source
and executable, is in EBCDIC and native z/OS file formats and this is the
recommended download site.
=============================
Last Updated: 15 June 2015

835
pcre2/doc/html/README.txt Normal file
View File

@ -0,0 +1,835 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
PCRE2 is a re-working of the original PCRE library to provide an entirely new
API. The latest release of PCRE2 is always available in three alternative
formats from:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
There is a mailing list for discussion about the development of PCRE (both the
original and new APIs) at pcre-dev@exim.org. You can access the archives and
subscribe or manage your subscription here:
https://lists.exim.org/mailman/listinfo/pcre-dev
Please read the NEWS file if you are upgrading from a previous release.
The contents of this README file are:
The PCRE2 APIs
Documentation for PCRE2
Contributions by users of PCRE2
Building PCRE2 on non-Unix-like systems
Building PCRE2 without using autotools
Building PCRE2 using autotools
Retrieving configuration information
Shared libraries
Cross-compiling using autotools
Making new tarballs
Testing PCRE2
Character tables
File manifest
The PCRE2 APIs
--------------
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. There are no C++
wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
library that are based on the POSIX regular expression API (see the pcre2posix
man page). These can be found in a library called libpcre2posix. Note that this
just provides a POSIX calling interface to PCRE2; the regular expressions
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
and does not give full access to all of PCRE2's facilities.
The header file for the POSIX-style functions is called pcre2posix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE2 with
an existing program that uses the POSIX API, pcre2posix.h will have to be
renamed or pointed at by a link.
If you are using the POSIX interface to PCRE2 and there is already a POSIX
regex library installed on your system, as well as worrying about the regex.h
header file (as mentioned above), you must also take care when linking programs
to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
pick up the POSIX functions of the same name from the other library.
One way of avoiding this confusion is to compile PCRE2 with the addition of
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
effect of renaming the functions so that the names no longer clash. Of course,
you have to do the same thing for your applications, or write them using the
new names.
Documentation for PCRE2
-----------------------
If you install PCRE2 in the normal way on a Unix-like system, you will end up
with a set of man pages whose names all start with "pcre2". The one that is
just called "pcre2" lists all the others. In addition to these man pages, the
PCRE2 documentation is supplied in two other forms:
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
doc/pcre2test.txt in the source distribution. The first of these is a
concatenation of the text forms of all the section 3 man pages except the
listing of pcre2demo.c and those that summarize individual functions. The
other two are the text forms of the section 1 man pages for the pcre2grep
and pcre2test commands. These text forms are provided for ease of scanning
with text editors or similar tools. They are installed in
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
(defaulting to /usr/local).
2. A set of files containing all the documentation in HTML form, hyperlinked
in various ways, and rooted in a file called index.html, is distributed in
doc/html and installed in <prefix>/share/doc/pcre2/html.
Building PCRE2 on non-Unix-like systems
---------------------------------------
For a non-Unix-like system, please read the comments in the file
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
"make" you may be able to build PCRE2 using autotools in the same way as for
many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
PCRE2 has been compiled on many different operating systems. It should be
straightforward to build PCRE2 on any system that has a Standard C compiler and
library, because it uses only Standard C functions.
Building PCRE2 without using autotools
--------------------------------------
The use of autotools (in particular, libtool) is problematic in some
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
file for ways of building PCRE2 without using autotools.
Building PCRE2 using autotools
------------------------------
The following instructions assume the use of the widely used "configure; make;
make install" (autotools) process.
To build PCRE2 on system that supports autotools, first run the "configure"
command from the PCRE2 distribution directory, with your current directory set
to the directory where you want the files to be created. This command is a
standard GNU "autoconf" configuration script, for which generic instructions
are supplied in the file INSTALL.
Most commonly, people build PCRE2 within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However,
the usual methods of changing standard defaults are available. For example:
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
This command specifies that the C compiler should be run with the flags '-O2
-Wall' instead of the default, and that "make install" should install PCRE2
under /opt/local instead of the default /usr/local.
If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE2 source
into /source/pcre2/pcre2-xxx, but you want to build it in
/build/pcre2/pcre2-xxx:
cd /build/pcre2/pcre2-xxx
/source/pcre2/pcre2-xxx/configure
PCRE2 is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE2
library. They are also documented in the pcre2build man page.
. By default, both shared and static libraries are built. You can change this
by adding one of these options to the "configure" command:
--disable-shared
--disable-static
(See also "Shared libraries on Unix-like systems" below.)
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
the "configure" command, the 16-bit library is also built. If you add
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
to disable building the 8-bit library.
. If you want to include support for just-in-time compiling, which can give
large performance improvements on certain platforms, add --enable-jit to the
"configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error.
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
you add --disable-pcre2grep-jit to the "configure" command.
. If you do not want to make use of the support for UTF-8 Unicode character
strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit
library, or UTF-32 Unicode character strings in the 32-bit library, you can
add --disable-unicode to the "configure" command. This reduces the size of
the libraries. It is not possible to configure one library with Unicode
support, and another without, in the same configuration.
When Unicode support is available, the use of a UTF encoding still has to be
enabled by setting the PCRE2_UTF option at run time or starting a pattern
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. It is
not possible to use both --enable-unicode and --enable-ebcdic at the same
time.
As well as supporting UTF strings, Unicode support includes support for the
\P, \p, and \X sequences that recognize Unicode character properties.
However, only the basic two-letter properties such as Lu are supported.
Escape sequences such as \d and \w in patterns do not by default make use of
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
or starting a pattern with (*UCP).
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
of the preceding, or any of the Unicode newline sequences, as indicating the
end of a line. Whatever you specify at build time is the default; the caller
of PCRE2 can change the selection at run time. The default newline indicator
is a single LF character (the Unix standard). You can specify the default
newline indicator by adding --enable-newline-is-cr, --enable-newline-is-lf,
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
--enable-newline-is-any to the "configure" command, respectively.
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
the standard tests will fail, because the lines in the test files end with
LF. Even if the files are edited to change the line endings, there are likely
to be some failures. With --enable-newline-is-anycrlf or
--enable-newline-is-any, many tests should succeed, but there may be some
failures.
. By default, the sequence \R in a pattern matches any Unicode line ending
sequence. This is independent of the option specifying what PCRE2 considers
to be the end of a line (see above). However, the caller of PCRE2 can
restrict \R to match only CR, LF, or CRLF. You can make this the default by
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
pattern. This limits the amount of system stack that a pattern uses when it
is compiled. The default is 250, but you can change it by setting, for
example,
--with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of resources it uses
when matching a pattern. If the limit is exceeded during a match, the match
fails. The default is ten million. You can change the default by setting, for
example,
--with-match-limit=500000
on the "configure" command. This is just the default; individual calls to
pcre2_match() can supply their own value. There is more discussion on the
pcre2api man page.
. There is a separate counter that limits the depth of recursive function calls
during a matching process. This also has a default of ten million, which is
essentially "unlimited". You can change the default by setting, for example,
--with-match-limit-recursion=500000
Recursive function calls use up the runtime stack; running out of stack can
cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcre2stack man page.
. In the 8-bit library, the default maximum compiled pattern size is around
64K. You can increase this by adding --with-link-size=3 to the "configure"
command. PCRE2 then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
the same as --with-link-size=4, which (in both libraries) uses four-byte
offsets. Increasing the internal link size reduces performance in the 8-bit
and 16-bit libraries. In the 32-bit library, the link size setting is
ignored, as 4-byte offsets are always used.
. You can build PCRE2 so that its internal match() function that is called from
pcre2_match() does not call itself recursively. Instead, it uses memory
blocks obtained from the heap to save data that would otherwise be saved on
the stack. To build PCRE2 like this, use
--disable-stack-for-recursion
on the "configure" command. PCRE2 runs more slowly in this mode, but it may
be necessary in environments with limited stack sizes. This applies only to
the normal execution of the pcre2_match() function; if JIT support is being
successfully used, it is not relevant. Equally, it does not apply to
pcre2_dfa_match(), which does not use deeply nested recursion. There is a
discussion about stack sizes in the pcre2stack man page.
. For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, it uses a set of
tables for ASCII encoding that is part of the distribution. If you specify
--enable-rebuild-chartables
a program called dftables is compiled and run in the default C locale when
you obey "make". It builds a source file called pcre2_chartables.c. If you do
not specify this option, pcre2_chartables.c is created as a copy of
pcre2_chartables.c.dist. See "Character tables" below for further
information.
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
character code (as opposed to ASCII/Unicode) by specifying
--enable-ebcdic --disable-unicode
This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
. In environments where valgrind is installed, if you specify
--enable-valgrind
PCRE2 will use valgrind annotations to mark certain memory regions as
unaddressable. This allows it to detect invalid memory accesses, and is
mostly useful for debugging PCRE2 itself.
. In environments where the gcc compiler is used and lcov version 1.6 or above
is installed, if you specify
--enable-coverage
the build process implements a code coverage report for the test suite. The
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE2 for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE2. There is more information about coverage
reporting in the "pcre2build" documentation.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
specifying one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
Of course, the relevant libraries must be installed on your system.
. The default size (in bytes) of the internal buffer used by pcre2grep can be
set by, for example:
--with-pcre2grep-bufsize=51200
The value must be a plain integer. The default is 20480.
. It is possible to compile pcre2test so that it links with the libreadline
or libedit libraries, by specifying, respectively,
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
If this is done, when pcre2test's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licenced, so if you distribute a binary of
pcre2test linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
Enabling libreadline causes the -lreadline option to be added to the
pcre2test build. In many operating environments with a sytem-installed
readline library this is sufficient. However, in some environments (e.g. if
an unmodified distribution version of readline is in use), it may be
necessary to specify something like LIBS="-lncurses" as well. This is
because, to quote the readline INSTALL, "Readline uses the termcap functions,
but does not link with the termcap or curses library itself, allowing
applications which link with readline the to choose an appropriate library."
If you get error messages about missing functions tgetstr, tgetent, tputs,
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
should fix it.
The "configure" script builds the following files for the basic C library:
. Makefile the makefile that builds the library
. src/config.h build-time configuration options for the library
. src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc )
. libpcre2-posix.pc )
. libtool script that builds shared and/or static libraries
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
tarballs under the names config.h.generic and pcre2.h.generic. These are
provided for those who have to build PCRE2 without using "configure" or CMake.
If you use "configure" or CMake, the .generic versions are not used.
The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.
Once "configure" has run, you can run "make". This builds whichever of the
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
program called pcre2test. If you enabled JIT support with --enable-jit, another
test program called pcre2_jit_test is built as well. If the 8-bit library is
built, libpcre2-posix and the pcre2grep command are also built. Running
"make" with the -j option may speed up compilation on multiprocessor systems.
The command "make check" runs all the appropriate tests. Details of the PCRE2
tests are given below in a separate section of this document. The -j option of
"make" can also be used when running the tests.
You can use "make install" to install PCRE2 into live directories on your
system. The following are installed (file names are all relative to the
<prefix> that is set when "configure" is run):
Commands (bin):
pcre2test
pcre2grep (if 8-bit support is enabled)
pcre2-config
Libraries (lib):
libpcre2-8 (if 8-bit support is enabled)
libpcre2-16 (if 16-bit support is enabled)
libpcre2-32 (if 32-bit support is enabled)
libpcre2-posix (if 8-bit support is enabled)
Configuration information (lib/pkgconfig):
libpcre2-8.pc
libpcre2-16.pc
libpcre2-32.pc
libpcre2-posix.pc
Header files (include):
pcre2.h
pcre2posix.h
Man pages (share/man/man{1,3}):
pcre2grep.1
pcre2test.1
pcre2-config.1
pcre2.3
pcre2*.3 (lots more pages, all starting "pcre2")
HTML documentation (share/doc/pcre2/html):
index.html
*.html (lots more pages, hyperlinked from index.html)
Text file documentation (share/doc/pcre2):
AUTHORS
COPYING
ChangeLog
LICENCE
NEWS
README
pcre2.txt (a concatenation of the man(3) pages)
pcre2test.txt the pcre2test man page
pcre2grep.txt the pcre2grep man page
pcre2-config.txt the pcre2-config man page
If you want to remove PCRE2 from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs.
Retrieving configuration information
------------------------------------
Running "make install" installs the command pcre2-config, which can be used to
recall information about the PCRE2 configuration and installation. For example:
pcre2-config --version
prints the version number, and
pcre2-config --libs8
outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information
about installed libraries. Instead of separate commands for each library, a
single command is used. For example:
pkg-config --libs libpcre2-16
The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig.
Shared libraries
----------------
The default distribution builds PCRE2 as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library
support relies on the "libtool" script which is built as part of the
"configure" process.
The libtool script is used to compile and link both shared and static
libraries. They are placed in a subdirectory called .libs when they are newly
built. The programs pcre2test and pcre2grep are built to use these uninstalled
libraries (by means of wrapper scripts in the case of shared libraries). When
you use "make install" to install shared libraries, pcre2grep and pcre2test are
automatically re-built to use the newly installed shared libraries before being
installed themselves. However, the versions left in the build directory still
use the uninstalled libraries.
To build PCRE2 using static libraries only you must use --disable-shared when
configuring it. For example:
./configure --prefix=/usr/gnu --disable-shared
Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries.
Cross-compiling using autotools
-------------------------------
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE2 for some other host. However, you should NOT
specify --enable-rebuild-chartables, because if you do, the dftables.c source
file is compiled and run on the local host, in order to generate the inbuilt
character tables (the pcre2_chartables.c file). This will probably not work,
because dftables.c needs to be compiled with the local compiler, not the cross
compiler.
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
created by making a copy of pcre2_chartables.c.dist, which is a default set of
tables that assumes ASCII code. Cross-compiling with the default tables should
not be a problem.
If you need to modify the character tables when cross-compiling, you should
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
and run it on the local host to make a new version of pcre2_chartables.c.dist.
Then when you cross-compile PCRE2 this new version of the tables will be used.
Making new tarballs
-------------------
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the PrepareRelease script before making a distribution. This
script creates the .txt and HTML forms of the documentation from the man pages.
Testing PCRE2
------------
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
There is another script called RunGrepTest that tests the pcre2grep command.
When JIT support is enabled, a third test program called pcre2_jit_test is
built. Both the scripts and all the program tests are run if you obey "make
check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcre2test test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. RunTest uses a file called testtry to hold the main output
from pcre2test. Other files whose names begin with "test" are used as working
files in some tests.
Some tests are relevant only when certain build-time options were selected. For
example, the tests for UTF-8/16/32 features are run only when Unicode support
is available. RunTest outputs a comment when it skips a test.
Many (but not all) of the tests that are not skipped are run twice if JIT
support is available. On the second run, JIT compilation is forced. This
testing can be suppressed by putting "nojit" on the RunTest command line.
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
libraries that are enabled. If you want to run just one set of tests, call
RunTest with either the -8, -16 or -32 option.
If valgrind is installed, you can run the tests under it by putting "valgrind"
on the RunTest command line. To run pcre2test on just one or more specific test
files, give their numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
end), or a number preceded by ~ to exclude a test. For example:
Runtest 3-15 ~10
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
except test 13. Whatever order the arguments are in, the tests are always run
in numerical order.
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
The test sequence starts with "test 0", which is a special test that has no
input file, and whose output is not checked. This is because it will be
different on different hardware and with different configurations. The test
exists in order to exercise some of pcre2test's code that would not otherwise
be run.
Tests 1 and 2 can always be run, as they expect only plain text strings (not
UTF) and make no use of Unicode properties. The first test file can be fed
directly into the perltest.sh script to check that Perl gives the same results.
The only difference you should see is in the first few lines, where the Perl
version is given instead of the PCRE2 version. The second set of tests check
auxiliary functions, error detection, and run-time flags that are specific to
PCRE2. It also uses the debugging flags to check some of the internals of
pcre2_compile().
If you build PCRE2 with a locale setting that is not the standard C locale, the
character tables may be different (see next paragraph). In some cases, this may
cause failures in the second set of tests. For example, in a locale where the
isprint() function yields TRUE for characters in the range 128-255, the use of
[:isascii:] inside a character class defines a different set of characters, and
this shows up in this test as a difference in the compiled code, which is being
listed for checking. For example, where the comparison test output contains
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
cases. This is not a bug in PCRE2.
Test 3 checks pcre2_maketables(), the facility for building a set of character
tables for a specific locale and using them instead of the default tables. The
script uses the "locale" command to check for the availability of the "fr_FR",
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
the list of available locales, the third test cannot be run, and a comment is
output to say why. If running this test produces an error like this:
** Failed to set locale "fr_FR"
it means that the given locale is not available on your system, despite being
listed by "locale". This does not mean that PCRE2 is broken. There are three
alternative output files for the third test, because three different versions
of the French locale have been encountered. The test passes if its output
matches any one of them.
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
with the perltest.sh script, and test 5 checking PCRE2-specific things.
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
non-UTF mode and UTF-mode with Unicode property support, respectively.
Test 8 checks some internal offsets and code size features; it is run only when
the default "link size" of 2 is set (in other cases the sizes change) and when
Unicode support is enabled.
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
16-bit and 32-bit modes. These are tests that generate different output in
8-bit mode. Each pair are for general cases and Unicode support, respectively.
Test 13 checks the handling of non-UTF characters greater than 255 by
pcre2_dfa_match() in 16-bit and 32-bit modes.
Test 14 contains a number of tests that must not be run with JIT. They check,
among other non-JIT things, the match-limiting features of the intepretive
matcher.
Test 15 is run only when JIT support is not available. It checks that an
attempt to use JIT has the expected behaviour.
Test 16 is run only when JIT support is available. It checks JIT complete and
partial modes, match-limiting under JIT, and other JIT-specific features.
Tests 17 and 18 are run only in 8-bit mode. They check the POSIX interface to
the 8-bit library, without and with Unicode support, respectively.
Test 19 checks the serialization functions by writing a set of compiled
patterns to a file, and then reloading and checking them.
Character tables
----------------
For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, a set of tables that is
built into the library is used. The pcre2_maketables() function can be called
by an application to create a new set of tables in the current locale. This are
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
compile context.
The source file called pcre2_chartables.c contains the default set of tables.
By default, this is created as a copy of pcre2_chartables.c.dist, which
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
specified for ./configure, a different version of pcre2_chartables.c is built
by the program dftables (compiled from dftables.c), which uses the ANSI C
character handling functions such as isalnum(), isalpha(), isupper(),
islower(), etc. to build the table sources. This means that the default C
locale which is set for your system will control the contents of these default
tables. You can change the default tables by editing pcre2_chartables.c and
then re-building PCRE2. If you do this, you should take care to ensure that the
file does not get automatically re-generated. The best way to do this is to
move pcre2_chartables.c.dist out of the way and replace it with your customized
tables.
When the dftables program is run as a result of --enable-rebuild-chartables,
it uses the default C locale that is set on your system. It does not pay
attention to the LC_xxx environment variables. In other words, it uses the
system's default locale rather than whatever the compiling user happens to have
set. If you really do want to build a source set of character tables in a
locale that is specified by the LC_xxx variables, you can run the dftables
program by hand with the -L option. For example:
./dftables -L pcre2_chartables.c.special
The first two 256-byte tables provide lower casing and case flipping functions,
respectively. The next table consists of three 32-byte bit maps which identify
digits, "word" characters, and white space, respectively. These are used when
building 32-byte bit maps that represent character classes for code points less
than 256. The final 256-byte table has bits indicating various character types,
as follows:
1 white space character
2 letter
4 decimal digit
8 hexadecimal digit
16 alphanumeric or '_'
128 regular expression metacharacter or binary zero
You should not alter the set of characters that contain the 128 bit, as that
will cause PCRE2 to malfunction.
File manifest
-------------
The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
src/dftables.c auxiliary program for building pcre2_chartables.c
when --enable-rebuild-chartables is specified
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_compile.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2_printint.c debugging function that is used by pcre2test,
src/config.h.in template for config.h, when built by "configure"
src/pcre2.h.in template for pcre2.h when built by "configure"
src/pcre2posix.h header for the external POSIX wrapper API
src/pcre2_internal.h header for internal use
src/pcre2_intmodedep.h a mode-specific internal header
src/pcre2_ucp.h header for Unicode property handling
sljit/* source files for the JIT compiler
(B) Source files for programs that use PCRE2:
src/pcre2demo.c simple demonstration of coding calls to PCRE2
src/pcre2grep.c source of a grep utility that uses PCRE2
src/pcre2test.c comprehensive test program
src/pcre2_printint.c part of pcre2test
src/pcre2_jit_test.c JIT test program
(C) Auxiliary files:
132html script to turn "man" pages into HTML
AUTHORS information about the author of PCRE2
ChangeLog log of changes to the code
CleanTxt script to clean nroff output for txt man pages
Detrail script to remove trailing spaces
HACKING some notes about the internals of PCRE2
INSTALL generic installation instructions
LICENCE conditions for the use of PCRE2
COPYING the same, using GNU's standard name
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
PrepareRelease script to make preparations for "make dist"
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcre2grep tests
aclocal.m4 m4 macros (generated by "aclocal")
config.guess ) files used by libtool,
config.sub ) used only when building a shared library
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
depcomp ) script to find program dependencies, generated by
) automake
doc/*.3 man page sources for PCRE2
doc/*.1 man page sources for pcre2grep and pcre2test
doc/index.html.src the base HTML page
doc/html/* HTML documentation
doc/pcre2.txt plain text version of the man pages
doc/pcre2test.txt plain text documentation of test program
install-sh a shell script for installing files
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
ltmain.sh file used to build a libtool script
missing ) common stub for a few missing GNU programs while
) installing, generated by automake
mkinstalldirs script for making install directories
perltest.sh Script for running a Perl test program
pcre2-config.in source of script which retains PCRE2 information
testdata/testinput* test data for main library tests
testdata/testoutput* expected test results
testdata/grep* input and output for pcre2grep tests
testdata/* other supporting test files
(D) Auxiliary files for cmake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindPackageHandleStandardArgs.cmake
cmake/FindEditline.cmake
cmake/FindReadline.cmake
CMakeLists.txt
config-cmake.h.in
(E) Auxiliary files for building PCRE2 "by hand"
pcre2.h.generic ) a version of the public PCRE2 header file
) for use in non-"configure" environments
config.h.generic ) a version of config.h for use in non-"configure"
) environments
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Last updated: 24 April 2015

264
pcre2/doc/html/index.html Normal file
View File

@ -0,0 +1,264 @@
<html>
<!-- This is a manually maintained file that is the root of the HTML version of
the PCRE2 documentation. When the HTML documents are built from the man
page versions, the entire doc/html directory is emptied, this file is then
copied into doc/html/index.html, and the remaining files therein are
created by the 132html script.
-->
<head>
<title>PCRE2 specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
<p>
The HTML documentation for PCRE2 consists of a number of pages that are listed
below in alphabetical order. If you are new to PCRE2, please read the first one
first.
</p>
<table>
<tr><td><a href="pcre2.html">pcre2</a></td>
<td>&nbsp;&nbsp;Introductory page</td></tr>
<tr><td><a href="pcre2-config.html">pcre2-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
<tr><td><a href="pcre2api.html">pcre2api</a></td>
<td>&nbsp;&nbsp;PCRE2's native API</td></tr>
<tr><td><a href="pcre2build.html">pcre2build</a></td>
<td>&nbsp;&nbsp;Building PCRE2</td></tr>
<tr><td><a href="pcre2callout.html">pcre2callout</a></td>
<td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
<td>&nbsp;&nbsp;Compability with Perl</td></tr>
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
<td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
<tr><td><a href="pcre2grep.html">pcre2grep</a></td>
<td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
<tr><td><a href="pcre2jit.html">pcre2jit</a></td>
<td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
<tr><td><a href="pcre2limits.html">pcre2limits</a></td>
<td>&nbsp;&nbsp;Details of size and other limits</td></tr>
<tr><td><a href="pcre2matching.html">pcre2matching</a></td>
<td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
<tr><td><a href="pcre2partial.html">pcre2partial</a></td>
<td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
<tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
<td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
<tr><td><a href="pcre2perform.html">pcre2perform</a></td>
<td>&nbsp;&nbsp;Some comments on performance</td></tr>
<tr><td><a href="pcre2posix.html">pcre2posix</a></td>
<td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
<tr><td><a href="pcre2sample.html">pcre2sample</a></td>
<td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
<tr><td><a href="pcre2serialize.html">pcre2serialize</a></td>
<td>&nbsp;&nbsp;Serializing functions for saving precompiled patterns</td></tr>
<tr><td><a href="pcre2stack.html">pcre2stack</a></td>
<td>&nbsp;&nbsp;Discussion of PCRE2's stack usage</td></tr>
<tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
<td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
<tr><td><a href="pcre2test.html">pcre2test</a></td>
<td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
<tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
<td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
</table>
<p>
There are also individual pages that summarize the interface for each function
in the library.
</p>
<table>
<tr><td><a href="pcre2_callout_enumerate.html">pcre2_callout_enumerate</a></td>
<td>&nbsp;&nbsp;Enumerate callouts in a compiled pattern</td></tr>
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
<td>&nbsp;&nbsp;Free a compiled pattern</td></tr>
<tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
<td>&nbsp;&nbsp;Compile a regular expression pattern</td></tr>
<tr><td><a href="pcre2_compile_context_copy.html">pcre2_compile_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a compile context</td></tr>
<tr><td><a href="pcre2_compile_context_create.html">pcre2_compile_context_create</a></td>
<td>&nbsp;&nbsp;Create a compile context</td></tr>
<tr><td><a href="pcre2_compile_context_free.html">pcre2_compile_context_free</a></td>
<td>&nbsp;&nbsp;Free a compile context</td></tr>
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
<td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
<tr><td><a href="pcre2_general_context_copy.html">pcre2_general_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a general context</td></tr>
<tr><td><a href="pcre2_general_context_create.html">pcre2_general_context_create</a></td>
<td>&nbsp;&nbsp;Create a general context</td></tr>
<tr><td><a href="pcre2_general_context_free.html">pcre2_general_context_free</a></td>
<td>&nbsp;&nbsp;Free a general context</td></tr>
<tr><td><a href="pcre2_get_error_message.html">pcre2_get_error_message</a></td>
<td>&nbsp;&nbsp;Free study data</td></tr>
<tr><td><a href="pcre2_get_mark.html">pcre2_get_mark</a></td>
<td>&nbsp;&nbsp;Get a (*MARK) name</td></tr>
<tr><td><a href="pcre2_get_ovector_count.html">pcre2_get_ovector_count</a></td>
<td>&nbsp;&nbsp;Get the ovector count</td></tr>
<tr><td><a href="pcre2_get_ovector_pointer.html">pcre2_get_ovector_pointer</a></td>
<td>&nbsp;&nbsp;Get a pointer to the ovector</td></tr>
<tr><td><a href="pcre2_get_startchar.html">pcre2_get_startchar</a></td>
<td>&nbsp;&nbsp;Get the starting character offset</td></tr>
<tr><td><a href="pcre2_jit_compile.html">pcre2_jit_compile</a></td>
<td>&nbsp;&nbsp;Process a compiled pattern with the JIT compiler</td></tr>
<tr><td><a href="pcre2_jit_free_unused_memory.html">pcre2_jit_free_unused_memory</a></td>
<td>&nbsp;&nbsp;Free unused JIT memory</td></tr>
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
<td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
<td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
<tr><td><a href="pcre2_jit_stack_create.html">pcre2_jit_stack_create</a></td>
<td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
<td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre2_match.html">pcre2_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr>
<tr><td><a href="pcre2_match_context_copy.html">pcre2_match_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a match context</td></tr>
<tr><td><a href="pcre2_match_context_create.html">pcre2_match_context_create</a></td>
<td>&nbsp;&nbsp;Create a match context</td></tr>
<tr><td><a href="pcre2_match_context_free.html">pcre2_match_context_free</a></td>
<td>&nbsp;&nbsp;Free a match context</td></tr>
<tr><td><a href="pcre2_match_data_create.html">pcre2_match_data_create</a></td>
<td>&nbsp;&nbsp;Create a match data block</td></tr>
<tr><td><a href="pcre2_match_data_create_from_pattern.html">pcre2_match_data_create_from_pattern</a></td>
<td>&nbsp;&nbsp;Create a match data block getting size from pattern</td></tr>
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
<td>&nbsp;&nbsp;Free a match data block</td></tr>
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
<td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
<tr><td><a href="pcre2_serialize_decode.html">pcre2_serialize_decode</a></td>
<td>&nbsp;&nbsp;Decode serialized compiled patterns</td></tr>
<tr><td><a href="pcre2_serialize_encode.html">pcre2_serialize_encode</a></td>
<td>&nbsp;&nbsp;Serialize compiled patterns for save/restore</td></tr>
<tr><td><a href="pcre2_serialize_free.html">pcre2_serialize_free</a></td>
<td>&nbsp;&nbsp;Free serialized compiled patterns</td></tr>
<tr><td><a href="pcre2_serialize_get_number_of_codes.html">pcre2_serialize_get_number_of_codes</a></td>
<td>&nbsp;&nbsp;Get number of serialized compiled patterns</td></tr>
<tr><td><a href="pcre2_set_bsr.html">pcre2_set_bsr</a></td>
<td>&nbsp;&nbsp;Set \R convention</td></tr>
<tr><td><a href="pcre2_set_callout.html">pcre2_set_callout</a></td>
<td>&nbsp;&nbsp;Set up a callout function</td></tr>
<tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
<td>&nbsp;&nbsp;Set character tables</td></tr>
<tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
<td>&nbsp;&nbsp;Set up a compile recursion guard function</td></tr>
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
<td>&nbsp;&nbsp;Set the match limit</td></tr>
<tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
<td>&nbsp;&nbsp;Set the newline convention</td></tr>
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
<td>&nbsp;&nbsp;Set the parentheses nesting limit</td></tr>
<tr><td><a href="pcre2_set_recursion_limit.html">pcre2_set_recursion_limit</a></td>
<td>&nbsp;&nbsp;Set the match recursion limit</td></tr>
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
<td>&nbsp;&nbsp;Set match recursion memory management</td></tr>
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string and do
substitutions</td></tr>
<tr><td><a href="pcre2_substring_copy_byname.html">pcre2_substring_copy_byname</a></td>
<td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
<tr><td><a href="pcre2_substring_copy_bynumber.html">pcre2_substring_copy_bynumber</a></td>
<td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
<tr><td><a href="pcre2_substring_free.html">pcre2_substring_free</a></td>
<td>&nbsp;&nbsp;Free extracted substring</td></tr>
<tr><td><a href="pcre2_substring_get_byname.html">pcre2_substring_get_byname</a></td>
<td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
<tr><td><a href="pcre2_substring_get_bynumber.html">pcre2_substring_get_bynumber</a></td>
<td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
<tr><td><a href="pcre2_substring_length_byname.html">pcre2_substring_length_byname</a></td>
<td>&nbsp;&nbsp;Find length of named substring</td></tr>
<tr><td><a href="pcre2_substring_length_bynumber.html">pcre2_substring_length_bynumber</a></td>
<td>&nbsp;&nbsp;Find length of numbered substring</td></tr>
<tr><td><a href="pcre2_substring_list_free.html">pcre2_substring_list_free</a></td>
<td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
<tr><td><a href="pcre2_substring_list_get.html">pcre2_substring_list_get</a></td>
<td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
<tr><td><a href="pcre2_substring_nametable_scan.html">pcre2_substring_nametable_scan</a></td>
<td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
<tr><td><a href="pcre2_substring_number_from_name.html">pcre2_substring_number_from_name</a></td>
<td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
</table>
</html>

View File

@ -0,0 +1,102 @@
<html>
<head>
<title>pcre2-config specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2-config man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">OPTIONS</a>
<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
<li><a name="TOC6" href="#SEC6">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>pcre2-config [--prefix] [--exec-prefix] [--version]</b>
<b> [--libs8] [--libs16] [--libs32] [--libs-posix]</b>
<b> [--cflags] [--cflags-posix]</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
<b>pcre2-config</b> returns the configuration of the installed PCRE2 libraries
and the options required to compile a program to use them. Some of the options
apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are
not available for libraries that have not been built. If an unavailable option
is encountered, the "usage" information is output.
</P>
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
<P>
<b>--prefix</b>
Writes the directory prefix used in the PCRE2 installation for architecture
independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
systems) to the standard output.
</P>
<P>
<b>--exec-prefix</b>
Writes the directory prefix used in the PCRE2 installation for architecture
dependent files (normally the same as <b>--prefix</b>) to the standard output.
</P>
<P>
<b>--version</b>
Writes the version number of the installed PCRE2 libraries to the standard
output.
</P>
<P>
<b>--libs8</b>
Writes to the standard output the command line options required to link
with the 8-bit PCRE2 library (<b>-lpcre2-8</b> on many systems).
</P>
<P>
<b>--libs16</b>
Writes to the standard output the command line options required to link
with the 16-bit PCRE2 library (<b>-lpcre2-16</b> on many systems).
</P>
<P>
<b>--libs32</b>
Writes to the standard output the command line options required to link
with the 32-bit PCRE2 library (<b>-lpcre2-32</b> on many systems).
</P>
<P>
<b>--libs-posix</b>
Writes to the standard output the command line options required to link with
PCRE2's POSIX API wrapper library (<b>-lpcre2-posix</b> <b>-lpcre2-8</b> on many
systems).
</P>
<P>
<b>--cflags</b>
Writes to the standard output the command line options required to compile
files that use PCRE2 (this may include some <b>-I</b> options, but is blank on
many systems).
</P>
<P>
<b>--cflags-posix</b>
Writes to the standard output the command line options required to compile
files that use PCRE2's POSIX API wrapper library (this may include some
<b>-I</b> options, but is blank on many systems).
</P>
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2(3)</b>
</P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P>
This manual page was originally written by Mark Baker for the Debian GNU/Linux
system. It has been subsequently revised as a generic PCRE2 man page.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 28 September 2014
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

196
pcre2/doc/html/pcre2.html Normal file
View File

@ -0,0 +1,196 @@
<html>
<head>
<title>pcre2 specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2 man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
<li><a name="TOC4" href="#SEC4">AUTHOR</a>
<li><a name="TOC5" href="#SEC5">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
<P>
PCRE2 is the name used for a revised API for the PCRE library, which is a set
of functions, written in C, that implement regular expression pattern matching
using the same syntax and semantics as Perl, with just a few differences. Some
features that appeared in Python and the original PCRE before they appeared in
Perl are also available using the Python syntax. There is also some support for
one or two .NET and Oniguruma syntax items, and there are options for
requesting some minor changes that give better ECMAScript (aka JavaScript)
compatibility.
</P>
<P>
The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
code units, which means that up to three separate libraries may be installed.
The original work to extend PCRE to 16-bit and 32-bit code units was done by
Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
can be interpreted either as one character per code unit, or as UTF-encoded
Unicode, with support for Unicode general category properties. Unicode support
is optional at build time (but is the default). However, processing strings as
UTF code units must be enabled explicitly at run time. The version of Unicode
in use can be discovered by running
<pre>
pcre2test -C
</PRE>
</P>
<P>
The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
one code unit width can be written using generic names such as
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
the case.
</P>
<P>
In addition to the Perl-compatible matching function, PCRE2 contains an
alternative function that matches the same compiled patterns in a different
way. In certain circumstances, the alternative function has some advantages.
For a discussion of the two matching algorithms, see the
<a href="pcre2matching.html"><b>pcre2matching</b></a>
page.
</P>
<P>
Details of exactly which Perl regular expression features are and are not
supported by PCRE2 are given in separate documents. See the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
and
<a href="pcre2compat.html"><b>pcre2compat</b></a>
pages. There is a syntax summary in the
<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
page.
</P>
<P>
Some features of PCRE2 can be included, excluded, or changed when the library
is built. The
<a href="pcre2_config.html"><b>pcre2_config()</b></a>
function makes it possible for a client to discover which features are
available. The features themselves are described in the
<a href="pcre2build.html"><b>pcre2build</b></a>
page. Documentation about building PCRE2 for various operating systems can be
found in the
<a href="README.txt"><b>README</b></a>
and
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
files in the source distribution.
</P>
<P>
The libraries contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but
which are not intended for use by external callers. Their names all begin with
"_pcre2", which hopefully will not provoke any name clashes. In some
environments, it is possible to control which external symbols are exported
when a shared library is built, and in these cases the undocumented symbols are
not exported.
</P>
<br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
<P>
If you are using PCRE2 in a non-UTF application that permits users to supply
arbitrary patterns for compilation, you should be aware of a feature that
allows users to turn on UTF support from within a pattern. For example, an
8-bit pattern that begins with "(*UTF)" turns on UTF-8 mode, which interprets
patterns and subjects as strings of UTF-8 code units instead of individual
8-bit characters. This causes both the pattern and any data against which it is
matched to be checked for UTF-8 validity. If the data string is very long, such
a check might use sufficiently many resources as to cause your application to
lose performance.
</P>
<P>
One way of guarding against this possibility is to use the
<b>pcre2_pattern_info()</b> function to check the compiled pattern's options for
PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when calling
<b>pcre2_compile()</b>. This causes an compile time error if a pattern contains
a UTF-setting sequence.
</P>
<P>
The use of Unicode properties for character types such as \d can also be
enabled from within the pattern, by specifying "(*UCP)". This feature can be
disallowed by setting the PCRE2_NEVER_UCP option.
</P>
<P>
If your application is one that supports UTF, be aware that validity checking
can take time. If the same data string is to be matched many times, you can use
the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid
running redundant checks.
</P>
<P>
The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to
problems, because it may leave the current matching point in the middle of a
multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to
lock out the use of \C, causing a compile-time error if it is encountered.
</P>
<P>
Another way that performance can be hit is by running a pattern that has a very
large search tree against a string that will never match. Nested unlimited
repeats in a pattern are a common example. PCRE2 provides some protection
against this: see the <b>pcre2_set_match_limit()</b> function in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page.
</P>
<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
<P>
The user documentation for PCRE2 comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format,
the descriptions of the <b>pcre2grep</b> and <b>pcre2test</b> programs are in
files called <b>pcre2grep.txt</b> and <b>pcre2test.txt</b>, respectively. The
remaining sections, except for the <b>pcre2demo</b> section (which is a program
listing), and the short pages for individual functions, are concatenated in
<b>pcre2.txt</b>, for ease of searching. The sections are as follows:
<pre>
pcre2 this document
pcre2-config show PCRE2 installation configuration information
pcre2api details of PCRE2's native C API
pcre2build building PCRE2
pcre2callout details of the callout feature
pcre2compat discussion of Perl compatibility
pcre2demo a demonstration C program that uses PCRE2
pcre2grep description of the <b>pcre2grep</b> command (8-bit only)
pcre2jit discussion of just-in-time optimization support
pcre2limits details of size and other limits
pcre2matching discussion of the two matching algorithms
pcre2partial details of the partial matching facility
pcre2pattern syntax and semantics of supported regular expression patterns
pcre2perform discussion of performance issues
pcre2posix the POSIX-compatible C API for the 8-bit library
pcre2sample discussion of the pcre2demo program
pcre2stack discussion of stack usage
pcre2syntax quick syntax reference
pcre2test description of the <b>pcre2test</b> command
pcre2unicode discussion of Unicode and UTF support
</pre>
In the "man" and HTML formats, there is also a short page for each C library
function, listing its arguments and results.
</P>
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge, England.
<br>
</P>
<P>
Putting an actual email address here is a spam magnet. If you want to email me,
use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
</P>
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
<P>
Last updated: 13 April 2015
<br>
Copyright &copy; 1997-2015 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,62 @@
<html>
<head>
<title>pcre2_callout_enumerate specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_callout_enumerate man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
<b> void *<i>callout_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function scans a compiled regular expression and calls the <i>callback()</i>
function for each callout within the pattern. The yield of the function is zero
for success and non-zero otherwise. The arguments are:
<pre>
<i>code</i> Points to the compiled pattern
<i>callback</i> The callback function
<i>callout_data</i> User data that is passed to the callback
</pre>
The <i>callback()</i> function is passed a pointer to a data block containing
the following fields:
<pre>
<i>version</i> Block version number
<i>pattern_position</i> Offset to next item in pattern
<i>next_item_length</i> Length of next item in pattern
<i>callout_number</i> Number for numbered callouts
<i>callout_string_offset</i> Offset to string within pattern
<i>callout_string_length</i> Length of callout string
<i>callout_string</i> Points to callout string or is NULL
</pre>
The second argument is the callout data that was passed to
<b>pcre2_callout_enumerate()</b>. The <b>callback()</b> function must return zero
for success. Any other value causes the pattern scan to stop, with the value
being passed back as the result of <b>pcre2_callout_enumerate()</b>.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,39 @@
<html>
<head>
<title>pcre2_code_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_code_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory used for a compiled pattern, including any
memory used by the JIT compiler.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,91 @@
<html>
<head>
<title>pcre2_compile specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
<b> pcre2_compile_context *<i>ccontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function compiles a regular expression pattern into an internal form. Its
arguments are:
<pre>
<i>pattern</i> A string containing expression to be compiled
<i>length</i> The length of the string or PCRE2_ZERO_TERMINATED
<i>options</i> Option bits
<i>errorcode</i> Where to put an error code
<i>erroffset</i> Where to put an error offset
<i>ccontext</i> Pointer to a compile context or NULL
</pre>
The length of the string and any error offset that is returned are in code
units, not characters. A compile context is needed only if you want to change
<pre>
What \R matches (Unicode newlines or CR, LF, CRLF only)
PCRE2's character tables
The newline character sequence
The compile time nested parentheses limit
</pre>
or provide an external function for stack size checking. The option bits are:
<pre>
PCRE2_ANCHORED Force pattern anchoring
PCRE2_ALT_BSUX Alternative handling of \u, \U, and \x
PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode
PCRE2_AUTO_CALLOUT Compile automatic callouts
PCRE2_CASELESS Do caseless matching
PCRE2_DOLLAR_ENDONLY $ not to match newline at end
PCRE2_DOTALL . matches anything including NL
PCRE2_DUPNAMES Allow duplicate names for subpatterns
PCRE2_EXTENDED Ignore white space and # comments
PCRE2_FIRSTLINE Force matching to be before newline
PCRE2_MATCH_UNSET_BACKREF Match unset back references
PCRE2_MULTILINE ^ and $ match newlines within data
PCRE2_NEVER_BACKSLASH_C Lock out the use of \C in patterns
PCRE2_NEVER_UCP Lock out PCRE2_UCP, e.g. via (*UCP)
PCRE2_NEVER_UTF Lock out PCRE2_UTF, e.g. via (*UTF)
PCRE2_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available)
PCRE2_NO_AUTO_POSSESS Disable auto-possessification
PCRE2_NO_DOTSTAR_ANCHOR Disable automatic anchoring for .*
PCRE2_NO_START_OPTIMIZE Disable match-time start optimizations
PCRE2_NO_UTF_CHECK Do not check the pattern for UTF validity
(only relevant if PCRE2_UTF is set)
PCRE2_UCP Use Unicode properties for \d, \w, etc.
PCRE2_UNGREEDY Invert greediness of quantifiers
PCRE2_UTF Treat pattern and subjects as UTF strings
</pre>
PCRE2 must be built with Unicode support in order to use PCRE2_UTF, PCRE2_UCP
and related options.
</P>
<P>
The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_compile_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_compile_context *pcre2_compile_context_copy(</b>
<b> pcre2_compile_context *<i>ccontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a new copy of a compile context, using the memory
allocation function that was used for the original context. The result is NULL
if the memory cannot be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_compile_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_compile_context *pcre2_compile_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates and initializes a new compile context. If its argument is
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
allocation function within the general context is used. The result is NULL if
the memory could not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_compile_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a compile context, using the memory
freeing function from the general context with which it was created, or
<b>free()</b> if that was not set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,83 @@
<html>
<head>
<title>pcre2_config specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_config man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes it possible for a client program to find out which optional
features are available in the version of the PCRE2 library it is using. The
arguments are as follows:
<pre>
<i>what</i> A code specifying what information is required
<i>where</i> Points to where to put the information
</pre>
If <i>where</i> is NULL, the function returns the amount of memory needed for
the requested information. When the information is a string, the value is in
code units; for other types of data it is in bytes.
</P>
<P>
If <b>where</b> is not NULL, for PCRE2_CONFIG_JITTARGET,
PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a
buffer that is large enough to hold the string. For all other codes it must
point to a uint32_t integer variable. The available codes are:
<pre>
PCRE2_CONFIG_BSR Indicates what \R matches by default:
PCRE2_BSR_UNICODE
PCRE2_BSR_ANYCRLF
PCRE2_CONFIG_JIT Availability of just-in-time compiler
support (1=yes 0=no)
PCRE2_CONFIG_JITTARGET Information about the target archi-
tecture for the JIT compiler
PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4)
PCRE2_CONFIG_MATCHLIMIT Default internal resource limit
PCRE2_CONFIG_NEWLINE Code for the default newline sequence:
PCRE2_NEWLINE_CR
PCRE2_NEWLINE_LF
PCRE2_NEWLINE_CRLF
PCRE2_NEWLINE_ANY
PCRE2_NEWLINE_ANYCRLF
PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit
PCRE2_CONFIG_RECURSIONLIMIT Internal recursion depth limit
PCRE2_CONFIG_STACKRECURSE Recursion implementation (1=stack
0=heap)
PCRE2_CONFIG_UNICODE Availability of Unicode support (1=yes
0=no)
PCRE2_CONFIG_UNICODE_VERSION The Unicode version (a string)
PCRE2_CONFIG_VERSION The PCRE2 version (a string)
</pre>
The function yields a non-negative value on success or the negative value
PCRE2_ERROR_BADOPTION otherwise. This is also the result for the
PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is
requested, the function returns the number of code units used, including the
terminating zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,79 @@
<html>
<head>
<title>pcre2_dfa_match specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_dfa_match man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>,</b>
<b> int *<i>workspace</i>, PCRE2_SIZE <i>wscount</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression against a given subject
string, using an alternative matching algorithm that scans the subject string
just once (<i>not</i> Perl-compatible). (The Perl-compatible matching function
is <b>pcre2_match()</b>.) The arguments for this function are:
<pre>
<i>code</i> Points to the compiled pattern
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string
<i>startoffset</i> Offset in the subject at which to start matching
<i>options</i> Option bits
<i>match_data</i> Points to a match data block, for results
<i>mcontext</i> Points to a match context, or is NULL
<i>workspace</i> Points to a vector of ints used as working space
<i>wscount</i> Number of elements in the vector
</pre>
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
up a callout function. The <i>length</i> and <i>startoffset</i> values are code
units, not characters. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
is not a valid match
PCRE2_NO_UTF_CHECK Do not check the subject for UTF
validity (only relevant if PCRE2_UTF
was set at compile time)
PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial
match if no full matches are found
PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial match
even if there is a full match as well
PCRE2_DFA_RESTART Restart after a partial match
PCRE2_DFA_SHORTEST Return only the shortest match
</pre>
There are restrictions on what may appear in a pattern when using this matching
function. Details are given in the
<a href="pcre2matching.html"><b>pcre2matching</b></a>
documentation. For details of partial matching, see the
<a href="pcre2partial.html"><b>pcre2partial</b></a>
page. There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_general_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_general_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_general_context *pcre2_general_context_copy(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a new copy of a general context, using the memory
allocation functions in the context, if set, to get the necessary memory.
Otherwise <b>malloc()</b> is used. The result is NULL if the memory cannot be
obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,44 @@
<html>
<head>
<title>pcre2_general_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_general_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_general_context *pcre2_general_context_create(</b>
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates and initializes a general context. The arguments define
custom memory management functions and a data value that is passed to them when
they are called. The <b>private_malloc()</b> function is used to get memory for
the context. If either of the first two arguments is NULL, the system memory
management function is used. The result is NULL if no memory could be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,39 @@
<html>
<head>
<title>pcre2_general_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_general_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a general context, using the memory
freeing function within the context, if set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,48 @@
<html>
<head>
<title>pcre2_get_error_message specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_error_message man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
<b> PCRE2_SIZE <i>bufflen</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function provides a textual error message for each PCRE2 error code.
Compilation errors are positive numbers; UTF formatting errors and matching
errors are negative numbers. The arguments are:
<pre>
<i>errorcode</i> an error code (positive or negative)
<i>buffer</i> where to put the message
<i>bufflen</i> the length of the buffer (code units)
</pre>
The function returns the length of the message, excluding the trailing zero, or
a negative error code if the buffer is too small.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_get_mark specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_mark man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
After a call of <b>pcre2_match()</b> that was passed the match block that is
this function's argument, this function returns a pointer to the last (*MARK)
name that was encountered. The name is zero-terminated, and is within the
compiled pattern. If no (*MARK) name is available, NULL is returned. A (*MARK)
name may be available after a failed match or a partial match, as well as after
a successful one.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,39 @@
<html>
<head>
<title>pcre2_get_ovector_count specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_ovector_count man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns the number of pairs of offsets in the ovector that forms
part of the given match data block.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_get_ovector_pointer specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_ovector_pointer man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns a pointer to the vector of offsets that forms part of the
given match data block. The number of pairs can be found by calling
<b>pcre2_get_ovector_count()</b>.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,44 @@
<html>
<head>
<title>pcre2_get_startchar specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_startchar man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
After a successful call of <b>pcre2_match()</b> that was passed the match block
that is this function's argument, this function returns the code unit offset of
the character at which the successful match started. For a non-partial match,
this can be different to the value of <i>ovector[0]</i> if the pattern contains
the \K escape sequence. After a partial match, however, this value is always
the same as <i>ovector[0]</i> because \K does not affect the result of a
partial match.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,56 @@
<html>
<head>
<title>pcre2_jit_compile specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_compile man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function requests JIT compilation, which, if the just-in-time compiler is
available, further processes a compiled pattern into machine code that executes
much faster than the <b>pcre2_match()</b> interpretive matching function. Full
details are given in the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
documentation.
</P>
<P>
The first argument is a pointer that was returned by a successful call to
<b>pcre2_compile()</b>, and the second must contain one or more of the following
bits:
<pre>
PCRE2_JIT_COMPLETE compile code for full matching
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching
</pre>
The yield of the function is 0 for success, or a negative error code otherwise.
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
if an unknown bit is set in <i>options</i>.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_jit_free_unused_memory specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_free_unused_memory man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees unused JIT executable memory. The argument is a general
context, for custom memory management, or NULL for standard memory management.
JIT memory allocation retains some memory in order to improve future JIT
compilation speed. In low memory conditions,
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
freed.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,58 @@
<html>
<head>
<title>pcre2_jit_match specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_match man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression that has been successfully
processed by the JIT compiler against a given subject string, using a matching
algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
Its arguments are exactly the same as for
<a href="pcre2_match.html"><b>pcre2_match()</b>.</a>
</P>
<P>
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported
options are ignored. The subject string is not checked for UTF validity.
</P>
<P>
The return values are the same as for <b>pcre2_match()</b> plus
PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
that was not compiled. For details of partial matching, see the
<a href="pcre2partial.html"><b>pcre2partial</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the JIT API in the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,70 @@
<html>
<head>
<title>pcre2_jit_stack_assign specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_stack_assign man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function provides control over the memory used by JIT as a run-time stack
when <b>pcre2_match()</b> or <b>pcre2_jit_match()</b> is called with a pattern
that has been successfully processed by the JIT compiler. The information that
determines which stack is used is put into a match context that is subsequently
passed to a matching function. The arguments of this function are:
<pre>
mcontext a pointer to a match context
callback a callback function
callback_data a JIT stack or a value to be passed to the callback
</PRE>
</P>
<P>
If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32K
block on the machine stack is used.
</P>
<P>
If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
<i>callback_data</i> must be a valid JIT stack, the result of calling
<b>pcre2_jit_stack_create()</b>.
</P>
<P>
If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
argument at the start of matching, in order to set up a JIT stack. If the
result is NULL, the internal 32K stack is used; otherwise the return value must
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
</P>
<P>
You may safely use the same JIT stack for multiple patterns, as long as they
are all matched in the same thread. In a multithread application, each thread
must use its own JIT stack. For more details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,50 @@
<html>
<head>
<title>pcre2_jit_stack_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_stack_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE <i>startsize</i>,</b>
<b> PCRE2_SIZE <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is used to create a stack for use by the code compiled by the JIT
compiler. The first two arguments are a starting size for the stack, and a
maximum size to which it is allowed to grow. The final argument is a general
context, for memory allocation functions, or NULL for standard memory
allocation. The result can be passed to the JIT run-time code by calling
<b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
which can then be processed by <b>pcre2_match()</b>. If the "fast path" JIT
matcher, <b>pcre2_jit_match()</b> is used, the stack can be passed directly as
an argument. A maximum stack size of 512K to 1M should be more than enough for
any pattern. For more details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_jit_stack_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_stack_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_jit_stack_free(pcre2_jit_stack *<i>jit_stack</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is used to free a JIT stack that was created by
<b>pcre2_jit_stack_create()</b> when it is no longer needed. For more details,
see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,48 @@
<html>
<head>
<title>pcre2_maketables specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_maketables man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>const unsigned char *pcre2_maketables(pcre22_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function builds a set of character tables for character values less than
256. These can be passed to <b>pcre2_compile()</b> in a compile context in order
to override the internal, built-in tables (which were either defaulted or made
by <b>pcre2_maketables()</b> when PCRE2 was compiled). See the
<a href="pcre2_set_character_tables.html"><b>pcre2_set_character_tables()</b></a>
page. You might want to do this if you are using a non-standard locale.
</P>
<P>
If the argument is NULL, <b>malloc()</b> is used to get memory for the tables.
Otherwise it must point to a general context, which can supply pointers to a
custom memory manager. The function yields a pointer to the tables.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,76 @@
<html>
<head>
<title>pcre2_match specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression against a given subject
string, using a matching algorithm that is similar to Perl's. It returns
offsets to captured substrings. Its arguments are:
<pre>
<i>code</i> Points to the compiled pattern
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string
<i>startoffset</i> Offset in the subject at which to start matching
<i>options</i> Option bits
<i>match_data</i> Points to a match data block, for results
<i>mcontext</i> Points to a match context, or is NULL
</pre>
A match context is needed only if you want to:
<pre>
Set up a callout function
Change the limit for calling the internal function <i>match()</i>
Change the limit for calling <i>match()</i> recursively
Set custom memory management when the heap is used for recursion
</pre>
The <i>length</i> and <i>startoffset</i> values are code
units, not characters. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
is not a valid match
PCRE2_NO_UTF_CHECK Do not check the subject for UTF
validity (only relevant if PCRE2_UTF
was set at compile time)
PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial
match if no full matches are found
PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial match
if that is found before a full match
</pre>
For details of partial matching, see the
<a href="pcre2partial.html"><b>pcre2partial</b></a>
page. There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_match_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_context *pcre2_match_context_copy(</b>
<b> pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a new copy of a match context, using the memory
allocation function that was used for the original context. The result is NULL
if the memory cannot be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_match_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_context *pcre2_match_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates and initializes a new match context. If its argument is
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
allocation function within the general context is used. The result is NULL if
the memory could not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_match_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a match context, using the memory
freeing function from the general context with which it was created, or
<b>free()</b> if that was not set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,49 @@
<html>
<head>
<title>pcre2_match_data_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_data_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates a new match data block, which is used for holding the
result of a match. The first argument specifies the number of pairs of offsets
that are required. These form the "output vector" (ovector) within the match
data block, and are used to identify the matched string and any captured
substrings. There is always one pair of offsets; if <b>ovecsize</b> is zero, it
is treated as one.
</P>
<P>
The second argument points to a general context, for custom memory management,
or is NULL for system memory management. The result of the function is NULL if
the memory for the block could not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,50 @@
<html>
<head>
<title>pcre2_match_data_create_from_pattern specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_data_create_from_pattern man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates a new match data block, which is used for holding the
result of a match. The first argument points to a compiled pattern. The number
of capturing parentheses within the pattern is used to compute the number of
pairs of offsets that are required in the match data block. These form the
"output vector" (ovector) within the match data block, and are used to identify
the matched string and any captured substrings.
</P>
<P>
The second argument points to a general context, for custom memory management,
or is NULL to use the same memory allocator as was used for the compiled
pattern. The result of the function is NULL if the memory for the block could
not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_match_data_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_data_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a match data block, using the memory
freeing function from the general context with which it was created, or
<b>free()</b> if that was not set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,106 @@
<html>
<head>
<title>pcre2_pattern_info specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_pattern_info man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns information about a compiled pattern. Its arguments are:
<pre>
<i>code</i> Pointer to a compiled regular expression
<i>what</i> What information is required
<i>where</i> Where to put the information
</pre>
The recognized values for the <i>what</i> argument, and the information they
request are as follows:
<pre>
PCRE2_INFO_ALLOPTIONS Final options after compiling
PCRE2_INFO_ARGOPTIONS Options passed to <b>pcre2_compile()</b>
PCRE2_INFO_BACKREFMAX Number of highest back reference
PCRE2_INFO_BSR What \R matches:
PCRE2_BSR_UNICODE: Unicode line endings
PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
PCRE2_INFO_CAPTURECOUNT Number of capturing subpatterns
PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL
PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1
PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information
0 nothing set
1 first code unit is set
2 start of string or after newline
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
exist in the pattern
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
0 nothing set
1 code unit is set
PCRE2_INFO_MATCHEMPTY 1 if the pattern can match an
empty string, 0 otherwise
PCRE2_INFO_MATCHLIMIT Match limit if set,
otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_MAXLOOKBEHIND Length (in characters) of the longest
lookbehind assertion
PCRE2_INFO_MINLENGTH Lower bound length of matching strings
PCRE2_INFO_NAMEENTRYSIZE Size of name table entries
PCRE2_INFO_NAMECOUNT Number of named subpatterns
PCRE2_INFO_NAMETABLE Pointer to name table
PCRE2_CONFIG_NEWLINE Code for the newline sequence:
PCRE2_NEWLINE_CR
PCRE2_NEWLINE_LF
PCRE2_NEWLINE_CRLF
PCRE2_NEWLINE_ANY
PCRE2_NEWLINE_ANYCRLF
PCRE2_INFO_RECURSIONLIMIT Recursion limit if set,
otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_SIZE Size of compiled pattern
</pre>
If <i>where</i> is NULL, the function returns the amount of memory needed for
the requested information, in bytes. Otherwise, the <i>where</i> argument must
point to an unsigned 32-bit integer (uint32_t variable), except for the
following <i>what</i> values, when it must point to a variable of the type
shown:
<pre>
PCRE2_INFO_FIRSTBITMAP const uint8_t *
PCRE2_INFO_JITSIZE size_t
PCRE2_INFO_NAMETABLE PCRE2_SPTR
PCRE2_INFO_SIZE size_t
</pre>
The yield of the function is zero on success or:
<pre>
PCRE2_ERROR_NULL the argument <i>code</i> is NULL
PCRE2_ERROR_BADMAGIC the "magic number" was not found
PCRE2_ERROR_BADOPTION the value of <i>what</i> is invalid
PCRE2_ERROR_BADMODE the pattern was compiled in the wrong mode
PCRE2_ERROR_UNSET the requested information is not set
</PRE>
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,62 @@
<html>
<head>
<title>pcre2_serialize_decode specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_decode man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b>
<b> int32_t <i>number_of_codes</i>, const uint32_t *<i>bytes</i>,</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function decodes a serialized set of compiled patterns back into a list of
individual patterns. Its arguments are:
<pre>
<i>codes</i> pointer to a vector in which to build the list
<i>number_of_codes</i> number of slots in the vector
<i>bytes</i> the serialized byte stream
<i>gcontext</i> pointer to a general context or NULL
</pre>
The <i>bytes</i> argument must point to a block of data that was originally
created by <b>pcre2_serialize_encode()</b>, though it may have been saved on
disc or elsewhere in the meantime. If there are more codes in the serialized
data than slots in the list, only those compiled patterns that will fit are
decoded. The yield of the function is the number of decoded patterns, or one of
the following negative error codes:
<pre>
PCRE2_ERROR_BADDATA <i>number_of_codes</i> is zero or less
PCRE2_ERROR_BADMAGIC mismatch of id bytes in <i>bytes</i>
PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version
PCRE2_ERROR_MEMORY memory allocation failed
PCRE2_ERROR_NULL <i>codes</i> or <i>bytes</i> is NULL
</pre>
PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
on a system with different endianness.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,61 @@
<html>
<head>
<title>pcre2_serialize_encode specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_encode man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int32_t pcre2_serialize_encode(pcre2_code **<i>codes</i>,</b>
<b> int32_t <i>number_of_codes</i>, uint32_t **<i>serialized_bytes</i>,</b>
<b> PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function encodes a list of compiled patterns into a byte stream that can
be saved on disc or elsewhere. Its arguments are:
<pre>
<i>codes</i> pointer to a vector containing the list
<i>number_of_codes</i> number of slots in the vector
<i>serialized_bytes</i> set to point to the serialized byte stream
<i>serialized_size</i> set to the number of bytes in the byte stream
<i>gcontext</i> pointer to a general context or NULL
</pre>
The context argument is used to obtain memory for the byte stream. When the
serialized data is no longer needed, it must be freed by calling
<b>pcre2_serialize_free()</b>. The yield of the function is the number of
serialized patterns, or one of the following negative error codes:
<pre>
PCRE2_ERROR_BADDATA <i>number_of_codes</i> is zero or less
PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns
PCRE2_ERROR_MEMORY memory allocation failed
PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables
PCRE2_ERROR_NULL an argument other than <i>gcontext</i> is NULL
</pre>
PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or
that a slot in the vector does not point to a compiled pattern.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_serialize_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_serialize_free(uint8_t *<i>bytes</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory that was obtained by
<b>pcre2_serialize_encode()</b> to hold a serialized byte stream. The argument
must point to such a byte stream.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,49 @@
<html>
<head>
<title>pcre2_serialize_get_number_of_codes specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_get_number_of_codes man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int32_t pcre2_serialize_get_number_of_codes(const uint8_t *<i>bytes</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
The <i>bytes</i> argument must point to a serialized byte stream that was
originally created by <b>pcre2_serialize_encode()</b> (though it may have been
saved on disc or elsewhere in the meantime). The function returns the number of
serialized patterns in the byte stream, or one of the following negative error
codes:
<pre>
PCRE2_ERROR_BADMAGIC mismatch of id bytes in <i>bytes</i>
PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version
PCRE2_ERROR_NULL the argument is NULL
</pre>
PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
on a system with different endianness.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_set_bsr specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_bsr man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the convention for processing \R within a compile context.
The second argument must be one of PCRE2_BSR_ANYCRLF or PCRE2_BSR_UNICODE. The
result is zero for success or PCRE2_ERROR_BADDATA if the second argument is
invalid.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_set_callout specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_callout man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
<b> void *<i>callout_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the callout fields in a match context (the first argument).
The second argument specifies a callout function, and the third argument is an
opaque data time that is passed to it. The result of this function is always
zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_set_character_tables specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_character_tables man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
<b> const unsigned char *<i>tables</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets a pointer to custom character tables within a compile
context. The second argument must be the result of a call to
<b>pcre2_maketables()</b> or NULL to request the default tables. The result is
always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,46 @@
<html>
<head>
<title>pcre2_set_compile_recursion_guard specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_compile_recursion_guard man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function defines, within a compile context, a function that is called
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
pattern. The first argument to the function gives the current depth of
parenthesis nesting, and the second is user data that is supplied when the
function is set up. The callout function should return zero if all is well, or
non-zero to force an error. This feature is provided so that applications can
check the available system stack space, in order to avoid running out. The
result of <b>pcre2_set_compile_recursion_guard()</b> is always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_set_match_limit specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_match_limit man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the match limit field in a match context. The result is
always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,50 @@
<html>
<head>
<title>pcre2_set_newline specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_newline man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the newline convention within a compile context. This
specifies which character(s) are recognized as newlines when compiling and
matching patterns. The second argument must be one of:
<pre>
PCRE2_NEWLINE_CR Carriage return only
PCRE2_NEWLINE_LF Linefeed only
PCRE2_NEWLINE_CRLF CR followed by LF only
PCRE2_NEWLINE_ANYCRLF Any of the above
PCRE2_NEWLINE_ANY Any Unicode newline sequence
</pre>
The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is
invalid.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_set_parens_nest_limit specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_parens_nest_limit man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets, in a compile context, the maximum depth of nested
parentheses in a pattern. The result is always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_set_recursion_limit specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_recursion_limit man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_recursion_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the recursion limit field in a match context. The result is
always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,47 @@
<html>
<head>
<title>pcre2_set_recursion_memory_management specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_recursion_memory_management man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_recursion_memory_management(</b>
<b> pcre2_match_context *<i>mcontext</i>,</b>
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the match context fields for custom memory management when
PCRE2 is compiled to use the heap instead of the system stack for recursive
function calls while matching. When PCRE2 is compiled to use the stack (the
default) this function does nothing. The first argument is a match context, the
second and third specify the memory allocation and freeing functions, and the
final argument is an opaque value that is passed to them whenever they are
called. The result of this function is always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,85 @@
<html>
<head>
<title>pcre2_substitute specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substitute man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression against a given subject
string, using a matching algorithm that is similar to Perl's. It then makes a
copy of the subject, substituting a replacement string for what was matched.
Its arguments are:
<pre>
<i>code</i> Points to the compiled pattern
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string
<i>startoffset</i> Offset in the subject at which to start matching
<i>options</i> Option bits
<i>match_data</i> Points to a match data block, or is NULL
<i>mcontext</i> Points to a match context, or is NULL
<i>replacement</i> Points to the replacement string
<i>rlength</i> Length of the replacement string
<i>outputbuffer</i> Points to the output buffer
<i>outlengthptr</i> Points to the length of the output buffer
</pre>
A match context is needed only if you want to:
<pre>
Set up a callout function
Change the limit for calling the internal function <i>match()</i>
Change the limit for calling <i>match()</i> recursively
Set custom memory management when the heap is used for recursion
</pre>
The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code
units, not characters, as is the contents of the variable pointed at by
<i>outlengthptr</i>, which is updated to the actual length of the new string.
The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
is not a valid match
PCRE2_NO_UTF_CHECK Do not check the subject or replacement for
UTF validity (only relevant if PCRE2_UTF
was set at compile time)
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
</pre>
The function returns the number of substitutions, which may be zero if there
were no matches. The result can be greater than one only when
PCRE2_SUBSTITUTE_GLOBAL is set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,58 @@
<html>
<head>
<title>pcre2_substring_copy_byname specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_copy_byname man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_copy_byname(pcre2_match_data *<i>match_data</i>,</b>
<b> PCRE2_SPTR <i>name</i>, PCRE2_UCHAR *<i>buffer</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring, identified
by name, into a given buffer. The arguments are:
<pre>
<i>match_data</i> The match data block for the match
<i>name</i> Name of the required substring
<i>buffer</i> Buffer to receive the string
<i>bufflen</i> Length of buffer (code units)
</pre>
The <i>bufflen</i> variable is updated to contain the length of the extracted
string, excluding the trailing zero. The yield of the function is zero for
success or one of the following error numbers:
<pre>
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
PCRE2_ERROR_UNSET the group did not participate in the match
PCRE2_ERROR_NOMEMORY the buffer is not big enough
</pre>
If there is more than one group with the given name, the first one that is set
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
given name was set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,57 @@
<html>
<head>
<title>pcre2_substring_copy_bynumber specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_copy_bynumber man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_copy_bynumber(pcre2_match_data *<i>match_data</i>,</b>
<b> uint32_t <i>number</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
<b> PCRE2_SIZE *<i>bufflen</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring into a given
buffer. The arguments are:
<pre>
<i>match_data</i> The match data block for the match
<i>number</i> Number of the required substring
<i>buffer</i> Buffer to receive the string
<i>bufflen</i> Length of buffer
</pre>
The <i>bufflen</i> variable is updated with the length of the extracted string,
excluding the terminating zero. The yield of the function is zero for success
or one of the following error numbers:
<pre>
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
PCRE2_ERROR_UNSET the group did not participate in the match
PCRE2_ERROR_NOMEMORY the buffer is too small
</PRE>
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_substring_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_substring_free(PCRE2_UCHAR *<i>buffer</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for freeing the memory obtained by a previous
call to <b>pcre2_substring_get_byname()</b> or
<b>pcre2_substring_get_bynumber()</b>. Its only argument is a pointer to the
string.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,60 @@
<html>
<head>
<title>pcre2_substring_get_byname specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_get_byname man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_get_byname(pcre2_match_data *<i>match_data</i>,</b>
<b> PCRE2_SPTR <i>name</i>, PCRE2_UCHAR **<i>bufferptr</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring by name into
newly acquired memory. The arguments are:
<pre>
<i>match_data</i> The match data for the match
<i>name</i> Name of the required substring
<i>bufferptr</i> Where to put the string pointer
<i>bufflen</i> Where to put the string length
</pre>
The memory in which the substring is placed is obtained by calling the same
memory allocation function that was used for the match data block. The
convenience function <b>pcre2_substring_free()</b> can be used to free it when
it is no longer needed. The yield of the function is zero for success or one of
the following error numbers:
<pre>
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
PCRE2_ERROR_UNSET the group did not participate in the match
PCRE2_ERROR_NOMEMORY memory could not be obtained
</pre>
If there is more than one group with the given name, the first one that is set
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
given name was set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,58 @@
<html>
<head>
<title>pcre2_substring_get_bynumber specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_get_bynumber man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_get_bynumber(pcre2_match_data *<i>match_data</i>,</b>
<b> uint32_t <i>number</i>, PCRE2_UCHAR **<i>bufferptr</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring by number
into newly acquired memory. The arguments are:
<pre>
<i>match_data</i> The match data for the match
<i>number</i> Number of the required substring
<i>bufferptr</i> Where to put the string pointer
<i>bufflen</i> Where to put the string length
</pre>
The memory in which the substring is placed is obtained by calling the same
memory allocation function that was used for the match data block. The
convenience function <b>pcre2_substring_free()</b> can be used to free it when
it is no longer needed. The yield of the function is zero for success or one of
the following error numbers:
<pre>
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
PCRE2_ERROR_UNSET the group did not participate in the match
PCRE2_ERROR_NOMEMORY memory could not be obtained
</PRE>
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,46 @@
<html>
<head>
<title>pcre2_substring_length_byname specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_length_byname man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_length_byname(pcre2_match_data *<i>match_data</i>,</b>
<b> PCRE2_SPTR <i>name</i>, PCRE2_SIZE *<i>length</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns the length of a matched substring, identified by name.
The arguments are:
<pre>
<i>match_data</i> The match data block for the match
<i>name</i> The substring name
<i>length</i> Where to return the length
</pre>
The yield is zero on success, or an error code if the substring is not found.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,48 @@
<html>
<head>
<title>pcre2_substring_length_bynumber specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_length_bynumber man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns the length of a matched substring, identified by number.
The arguments are:
<pre>
<i>match_data</i> The match data block for the match
<i>number</i> The substring number
<i>length</i> Where to return the length, or NULL
</pre>
The third argument may be NULL if all you want to know is whether or not a
substring is set. The yield is zero on success, or a negative error code
otherwise. After a partial match, only substring 0 is available.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_substring_list_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_list_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_substring_list_free(PCRE2_SPTR *<i>list</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for freeing the store obtained by a previous
call to <b>pcre2substring_list_get()</b>. Its only argument is a pointer to
the list of string pointers.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,56 @@
<html>
<head>
<title>pcre2_substring_list_get specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_list_get man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting all the captured substrings after
a pattern match. It builds a list of pointers to the strings, and (optionally)
a second list that contains their lengths (in code units), excluding a
terminating zero that is added to each of them. All this is done in a single
block of memory that is obtained using the same memory allocation function that
was used to get the match data block. The convenience function
<b>pcre2_substring_list_free()</b> can be used to free it when it is no longer
needed. The arguments are:
<pre>
<i>match_data</i> The match data block
<i>listptr</i> Where to put a pointer to the list
<i>lengthsptr</i> Where to put a pointer to the lengths, or NULL
</pre>
A pointer to a list of pointers is put in the variable whose address is in
<i>listptr</i>. The list is terminated by a NULL pointer. If <i>lengthsptr</i> is
not NULL, a matching list of lengths is created, and its address is placed in
<i>lengthsptr</i>. The yield of the function is zero on success or
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,53 @@
<html>
<head>
<title>pcre2_substring_nametable_scan specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_nametable_scan man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This convenience function finds, for a compiled pattern, the first and last
entries for a given name in the table that translates capturing parenthesis
names into numbers.
<pre>
<i>code</i> Compiled regular expression
<i>name</i> Name whose entries required
<i>first</i> Where to return a pointer to the first entry
<i>last</i> Where to return a pointer to the last entry
</pre>
When the name is found in the table, if <i>first</i> is NULL, the function
returns a group number, but if there is more than one matching entry, it is not
defined which one. Otherwise, when both pointers have been set, the yield of
the function is the length of each entry in code units. If the name is not
found, PCRE2_ERROR_NOSUBSTRING is returned.
</P>
<P>
There is a complete description of the PCRE2 native API, including the format of
the table entries, in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page, and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,50 @@
<html>
<head>
<title>pcre2_substring_number_from_name specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_substring_number_from_name man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
<b> PCRE2_SPTR <i>name</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This convenience function finds the number of a named substring capturing
parenthesis in a compiled pattern, provided that it is a unique name. The
function arguments are:
<pre>
<i>code</i> Compiled regular expression
<i>name</i> Name whose number is required
</pre>
The yield of the function is the number of the parenthesis if the name is
found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are
allowed (PCRE2_DUPNAMES is set), if the name is not unique,
PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers
with the same name by calling <b>pcre2_substring_nametable_scan()</b>.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

2892
pcre2/doc/html/pcre2api.html Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,504 @@
<html>
<head>
<title>pcre2build specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2build man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">BUILDING PCRE2</a>
<li><a name="TOC2" href="#SEC2">PCRE2 BUILD-TIME OPTIONS</a>
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
<li><a name="TOC5" href="#SEC5">UNICODE AND UTF SUPPORT</a>
<li><a name="TOC6" href="#SEC6">JUST-IN-TIME COMPILER SUPPORT</a>
<li><a name="TOC7" href="#SEC7">NEWLINE RECOGNITION</a>
<li><a name="TOC8" href="#SEC8">WHAT \R MATCHES</a>
<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a>
<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a>
<li><a name="TOC11" href="#SEC11">LIMITING PCRE2 RESOURCE USAGE</a>
<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
<li><a name="TOC14" href="#SEC14">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
<li><a name="TOC15" href="#SEC15">PCRE2GREP BUFFER SIZE</a>
<li><a name="TOC16" href="#SEC16">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
<li><a name="TOC17" href="#SEC17">INCLUDING DEBUGGING CODE</a>
<li><a name="TOC18" href="#SEC18">DEBUGGING WITH VALGRIND SUPPORT</a>
<li><a name="TOC19" href="#SEC19">CODE COVERAGE REPORTING</a>
<li><a name="TOC20" href="#SEC20">SEE ALSO</a>
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
<li><a name="TOC22" href="#SEC22">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
<P>
PCRE2 is distributed with a <b>configure</b> script that can be used to build
the library in Unix-like environments using the applications known as
Autotools. Also in the distribution are files to support building using
<b>CMake</b> instead of <b>configure</b>. The text file
<a href="README.txt"><b>README</b></a>
contains general information about building with Autotools (some of which is
repeated below), and also has some comments about building on various operating
systems. There is a lot more information about building PCRE2 without using
Autotools (including information about using <b>CMake</b> and building "by
hand") in the text file called
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
You should consult this file as well as the
<a href="README.txt"><b>README</b></a>
file if you are building in a non-Unix-like environment.
</P>
<br><a name="SEC2" href="#TOC1">PCRE2 BUILD-TIME OPTIONS</a><br>
<P>
The rest of this document describes the optional features of PCRE2 that can be
selected when the library is compiled. It assumes use of the <b>configure</b>
script, where the optional features are selected or deselected by providing
options to <b>configure</b> before running the <b>make</b> command. However, the
same options can be selected in both Unix-like and non-Unix-like environments
if you are using <b>CMake</b> instead of <b>configure</b> to build PCRE2.
</P>
<P>
If you are not using Autotools or <b>CMake</b>, option selection can be done by
editing the <b>config.h</b> file, or by passing parameter settings to the
compiler, as described in
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
</P>
<P>
The complete list of options for <b>configure</b> (which includes the standard
ones such as the selection of the installation directory) can be obtained by
running
<pre>
./configure --help
</pre>
The following sections include descriptions of options whose names begin with
--enable or --disable. These settings specify changes to the defaults for the
<b>configure</b> command. Because of the way that <b>configure</b> works,
--enable and --disable always come in pairs, so the complementary option always
exists as well, but as it specifies the default, it is not described.
</P>
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
<P>
By default, a library called <b>libpcre2-8</b> is built, containing functions
that take string arguments contained in vectors of bytes, interpreted either as
single-byte characters, or UTF-8 strings. You can also build two other
libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process
strings that are contained in vectors of 16-bit and 32-bit code units,
respectively. These can be interpreted either as single-unit characters or
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
the following to the <b>configure</b> command:
<pre>
--enable-pcre2-16
--enable-pcre2-32
</pre>
If you do not want the 8-bit library, add
<pre>
--disable-pcre2-8
</pre>
as well. At least one of the three libraries must be built. Note that the POSIX
wrapper is for the 8-bit library only, and that <b>pcre2grep</b> is an 8-bit
program. Neither of these are built if you select only the 16-bit or 32-bit
libraries.
</P>
<br><a name="SEC4" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
<P>
The Autotools PCRE2 building process uses <b>libtool</b> to build both shared
and static libraries by default. You can suppress an unwanted library by adding
one of
<pre>
--disable-shared
--disable-static
</pre>
to the <b>configure</b> command.
</P>
<br><a name="SEC5" href="#TOC1">UNICODE AND UTF SUPPORT</a><br>
<P>
By default, PCRE2 is built with support for Unicode and UTF character strings.
To build it without Unicode support, add
<pre>
--disable-unicode
</pre>
to the <b>configure</b> command. This setting applies to all three libraries. It
is not possible to build one library with Unicode support, and another without,
in the same configuration.
</P>
<P>
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16
or UTF-32. To do that, applications that use the library can set the PCRE2_UTF
option when they call <b>pcre2_compile()</b> to compile a pattern.
Alternatively, patterns may be started with (*UTF) unless the application has
locked this out by setting PCRE2_NEVER_UTF.
</P>
<P>
UTF support allows the libraries to process character code points up to
0x10ffff in the strings that they handle. It also provides support for
accessing the Unicode properties of such characters, using pattern escapes such
as \P, \p, and \X. Only the general category properties such as <i>Lu</i> and
<i>Nd</i> are supported. Details are given in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation.
</P>
<P>
Pattern escapes such as \d and \w do not by default make use of Unicode
properties. The application can request that they do by setting the PCRE2_UCP
option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
request this by starting with (*UCP).
</P>
<P>
The \C escape sequence, which matches a single code unit, even in a UTF mode,
can cause unpredictable behaviour because it may leave the current matching
point in the middle of a multi-code-unit character. It can be locked out by
setting the PCRE2_NEVER_BACKSLASH_C option.
</P>
<br><a name="SEC6" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
<P>
Just-in-time compiler support is included in the build by specifying
<pre>
--enable-jit
</pre>
This support is available only for certain hardware architectures. If this
option is set for an unsupported architecture, a building error occurs.
See the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
documentation for a discussion of JIT usage. When JIT support is enabled,
pcre2grep automatically makes use of it, unless you add
<pre>
--disable-pcre2grep-jit
</pre>
to the "configure" command.
</P>
<br><a name="SEC7" href="#TOC1">NEWLINE RECOGNITION</a><br>
<P>
By default, PCRE2 interprets the linefeed (LF) character as indicating the end
of a line. This is the normal newline character on Unix-like systems. You can
compile PCRE2 to use carriage return (CR) instead, by adding
<pre>
--enable-newline-is-cr
</pre>
to the <b>configure</b> command. There is also an --enable-newline-is-lf option,
which explicitly specifies linefeed as the newline character.
</P>
<P>
Alternatively, you can specify that line endings are to be indicated by the
two-character sequence CRLF (CR immediately followed by LF). If you want this,
add
<pre>
--enable-newline-is-crlf
</pre>
to the <b>configure</b> command. There is a fourth option, specified by
<pre>
--enable-newline-is-anycrlf
</pre>
which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as
indicating a line ending. Finally, a fifth option, specified by
<pre>
--enable-newline-is-any
</pre>
causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline
sequences are the three just mentioned, plus the single characters VT (vertical
tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line
separator, U+2028), and PS (paragraph separator, U+2029).
</P>
<P>
Whatever default line ending convention is selected when PCRE2 is built can be
overridden by applications that use the library. At build time it is
conventional to use the standard for your operating system.
</P>
<br><a name="SEC8" href="#TOC1">WHAT \R MATCHES</a><br>
<P>
By default, the sequence \R in a pattern matches any Unicode newline sequence,
independently of what has been selected as the line ending sequence. If you
specify
<pre>
--enable-bsr-anycrlf
</pre>
the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
selected when PCRE2 is built can be overridden by applications that use the
called.
</P>
<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
<P>
Within a compiled pattern, offset values are used to point from one part to
another (for example, from an opening parenthesis to an alternation
metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
are used for these offsets, leading to a maximum size for a compiled pattern of
around 64K code units. This is sufficient to handle all but the most gigantic
patterns. Nevertheless, some people do want to process truly enormous patterns,
so it is possible to compile PCRE2 to use three-byte or four-byte offsets by
adding a setting such as
<pre>
--with-link-size=3
</pre>
to the <b>configure</b> command. The value given must be 2, 3, or 4. For the
16-bit library, a value of 3 is rounded up to 4. In these libraries, using
longer offsets slows down the operation of PCRE2 because it has to load
additional data when handling them. For the 32-bit library the value is always
4 and cannot be overridden; the value of --with-link-size is ignored.
</P>
<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
<P>
When matching with the <b>pcre2_match()</b> function, PCRE2 implements
backtracking by making recursive calls to an internal function called
<b>match()</b>. In environments where the size of the stack is limited, this can
severely limit PCRE2's operation. (The Unix environment does not usually suffer
from this problem, but it may sometimes be necessary to increase the maximum
stack size. There is a discussion in the
<a href="pcre2stack.html"><b>pcre2stack</b></a>
documentation.) An alternative approach to recursion that uses memory from the
heap to remember data, instead of using recursive function calls, has been
implemented to work round the problem of limited stack size. If you want to
build a version of PCRE2 that works this way, add
<pre>
--disable-stack-for-recursion
</pre>
to the <b>configure</b> command. By default, the system functions <b>malloc()</b>
and <b>free()</b> are called to manage the heap memory that is required, but
custom memory management functions can be called instead. PCRE2 runs noticeably
more slowly when built in this way. This option affects only the
<b>pcre2_match()</b> function; it is not relevant for <b>pcre2_dfa_match()</b>.
</P>
<br><a name="SEC11" href="#TOC1">LIMITING PCRE2 RESOURCE USAGE</a><br>
<P>
Internally, PCRE2 has a function called <b>match()</b>, which it calls
repeatedly (sometimes recursively) when matching a pattern with the
<b>pcre2_match()</b> function. By controlling the maximum number of times this
function may be called during a single matching operation, a limit can be
placed on the resources used by a single call to <b>pcre2_match()</b>. The limit
can be changed at run time, as described in the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation. The default is 10 million, but this can be changed by adding a
setting such as
<pre>
--with-match-limit=500000
</pre>
to the <b>configure</b> command. This setting has no effect on the
<b>pcre2_dfa_match()</b> matching function.
</P>
<P>
In some environments it is desirable to limit the depth of recursive calls of
<b>match()</b> more strictly than the total number of calls, in order to
restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
is specified) that is used. A second limit controls this; it defaults to the
value that is set for --with-match-limit, which imposes no additional
constraints. However, you can set a lower limit by adding, for example,
<pre>
--with-match-limit-recursion=10000
</pre>
to the <b>configure</b> command. This value can also be overridden at run time.
</P>
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<P>
PCRE2 uses fixed tables for processing characters whose code points are less
than 256. By default, PCRE2 is built with a set of tables that are distributed
in the file <i>src/pcre2_chartables.c.dist</i>. These tables are for ASCII codes
only. If you add
<pre>
--enable-rebuild-chartables
</pre>
to the <b>configure</b> command, the distributed tables are no longer used.
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
source for new set of tables, created in the default locale of your C run-time
system. (This method of replacing the tables does not work if you are cross
compiling, because <b>dftables</b> is run on the local host. If you need to
create alternative tables when cross compiling, you will have to do so "by
hand".)
</P>
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
<P>
PCRE2 assumes by default that it will run in an environment where the character
code is ASCII or Unicode, which is a superset of ASCII. This is the case for
most computer operating systems. PCRE2 can, however, be compiled to run in an
8-bit EBCDIC environment by adding
<pre>
--enable-ebcdic --disable-unicode
</pre>
to the <b>configure</b> command. This setting implies
--enable-rebuild-chartables. You should only use it if you know that you are in
an EBCDIC environment (for example, an IBM mainframe operating system).
</P>
<P>
It is not possible to support both EBCDIC and UTF-8 codes in the same version
of the library. Consequently, --enable-unicode and --enable-ebcdic are mutually
exclusive.
</P>
<P>
The EBCDIC character that corresponds to an ASCII LF is assumed to have the
value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In
such an environment you should use
<pre>
--enable-ebcdic-nl25
</pre>
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the
same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is <i>not</i>
chosen as LF is made to correspond to the Unicode NEL character (which, in
Unicode, is 0x85).
</P>
<P>
The options that select newline behaviour, such as --enable-newline-is-cr,
and equivalent run-time options, refer to these character values in an EBCDIC
environment.
</P>
<br><a name="SEC14" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
<P>
By default, <b>pcre2grep</b> reads all files as plain text. You can build it so
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
them with <b>libz</b> or <b>libbz2</b>, respectively, by adding one or both of
<pre>
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
</pre>
to the <b>configure</b> command. These options naturally require that the
relevant libraries are installed on your system. Configuration will fail if
they are not.
</P>
<br><a name="SEC15" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
<P>
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when it
finds a match. The size of the buffer is controlled by a parameter whose
default value is 20K. The buffer itself is three times this size, but because
of the way it is used for holding "before" lines, the longest line that is
guaranteed to be processable is the parameter size. You can change the default
parameter value by adding, for example,
<pre>
--with-pcre2grep-bufsize=50K
</pre>
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override this
value by using --buffer-size on the command line..
</P>
<br><a name="SEC16" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
<P>
If you add one of
<pre>
--enable-pcre2test-libreadline
--enable-pcre2test-libedit
</pre>
to the <b>configure</b> command, <b>pcre2test</b> is linked with the
<b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is
from a terminal, it reads it using the <b>readline()</b> function. This provides
line-editing and history facilities. Note that <b>libreadline</b> is
GPL-licensed, so if you distribute a binary of <b>pcre2test</b> linked in this
way, there may be licensing issues. These can be avoided by linking instead
with <b>libedit</b>, which has a BSD licence.
</P>
<P>
Setting --enable-pcre2test-libreadline causes the <b>-lreadline</b> option to be
added to the <b>pcre2test</b> build. In many operating environments with a
sytem-installed readline library this is sufficient. However, in some
environments (e.g. if an unmodified distribution version of readline is in
use), some extra configuration may be necessary. The INSTALL file for
<b>libreadline</b> says this:
<pre>
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library."
</pre>
If your environment has not been set up so that an appropriate library is
automatically included, you may need to add something like
<pre>
LIBS="-ncurses"
</pre>
immediately before the <b>configure</b> command.
</P>
<br><a name="SEC17" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
<P>
If you add
<pre>
--enable-debug
</pre>
to the <b>configure</b> command, additional debugging code is included in the
build. This feature is intended for use by the PCRE2 maintainers.
</P>
<br><a name="SEC18" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
<P>
If you add
<pre>
--enable-valgrind
</pre>
to the <b>configure</b> command, PCRE2 will use valgrind annotations to mark
certain memory regions as unaddressable. This allows it to detect invalid
memory accesses, and is mostly useful for debugging PCRE2 itself.
</P>
<br><a name="SEC19" href="#TOC1">CODE COVERAGE REPORTING</a><br>
<P>
If your C compiler is gcc, you can build a version of PCRE2 that can generate a
code coverage report for its test suite. To enable this, you must install
<b>lcov</b> version 1.6 or above. Then specify
<pre>
--enable-coverage
</pre>
to the <b>configure</b> command and build PCRE2 in the usual way.
</P>
<P>
Note that using <b>ccache</b> (a caching C compiler) is incompatible with code
coverage reporting. If you have configured <b>ccache</b> to run automatically
on your system, you must set the environment variable
<pre>
CCACHE_DISABLE=1
</pre>
before running <b>make</b> to build PCRE2, so that <b>ccache</b> is not used.
</P>
<P>
When --enable-coverage is used, the following addition targets are added to the
<i>Makefile</i>:
<pre>
make coverage
</pre>
This creates a fresh coverage report for the PCRE2 test suite. It is equivalent
to running "make coverage-reset", "make coverage-baseline", "make check", and
then "make coverage-report".
<pre>
make coverage-reset
</pre>
This zeroes the coverage counters, but does nothing else.
<pre>
make coverage-baseline
</pre>
This captures baseline coverage information.
<pre>
make coverage-report
</pre>
This creates the coverage report.
<pre>
make coverage-clean-report
</pre>
This removes the generated coverage report without cleaning the coverage data
itself.
<pre>
make coverage-clean-data
</pre>
This removes the captured coverage data without removing the coverage files
created at compile time (*.gcno).
<pre>
make coverage-clean
</pre>
This cleans all coverage data including the generated coverage report. For more
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
documentation.
</P>
<br><a name="SEC20" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
</P>
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge, England.
<br>
</P>
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
<P>
Last updated: 24 April 2015
<br>
Copyright &copy; 1997-2015 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,408 @@
<html>
<head>
<title>pcre2callout specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2callout man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM CALLOUTS</a>
<li><a name="TOC6" href="#SEC6">CALLOUT ENUMERATION</a>
<li><a name="TOC7" href="#SEC7">AUTHOR</a>
<li><a name="TOC8" href="#SEC8">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int (*pcre2_callout)(pcre2_callout_block *, void *);</b>
<br>
<br>
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
<b> void *<i>user_data</i>);</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
PCRE2 provides a feature called "callout", which is a means of temporarily
passing control to the caller of PCRE2 in the middle of pattern matching. The
caller of PCRE2 provides an external function by putting its entry point in
a match context (see <b>pcre2_set_callout()</b> in the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation).
</P>
<P>
Within a regular expression, (?C&#60;arg&#62;) indicates a point at which the external
function is to be called. Different callout points can be identified by putting
a number less than 256 after the letter C. The default value is zero.
Alternatively, the argument may be a delimited string. The starting delimiter
must be one of ` ' " ^ % # $ { and the ending delimiter is the same as the
start, except for {, where the ending delimiter is }. If the ending delimiter
is needed within the string, it must be doubled. For example, this pattern has
two callout points:
<pre>
(?C1)abc(?C"some ""arbitrary"" text")def
</pre>
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
automatically inserts callouts, all with number 255, before each item in the
pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
<pre>
A(\d{2}|--)
</pre>
it is processed as if it were
<br>
<br>
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
<br>
<br>
Notice that there is a callout before and after each parenthesis and
alternation bar. If the pattern contains a conditional group whose condition is
an assertion, an automatic callout is inserted immediately before the
condition. Such a callout may also be inserted explicitly, for example:
<pre>
(?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de)
</pre>
This applies only to assertion conditions (because they are themselves
independent groups).
</P>
<P>
Callouts can be useful for tracking the progress of pattern matching. The
<a href="pcre2test.html"><b>pcre2test</b></a>
program has a pattern qualifier (/auto_callout) that sets automatic callouts.
When any callouts are present, the output from <b>pcre2test</b> indicates how
the pattern is being matched. This is useful information when you are trying to
optimize the performance of a particular pattern.
</P>
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
<P>
You should be aware that, because of optimizations in the way PCRE2 compiles
and matches patterns, callouts sometimes do not happen exactly as you might
expect.
</P>
<br><b>
Auto-possessification
</b><br>
<P>
At compile time, PCRE2 "auto-possessifies" repeated items when it knows that
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
if it were a++[bc]. The <b>pcre2test</b> output when this pattern is compiled
with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string
"aaaa" is:
<pre>
---&#62;aaaa
+0 ^ a+
+2 ^ ^ [bc]
No match
</pre>
This indicates that when matching [bc] fails, there is no backtracking into a+
and therefore the callouts that would be taken for the backtracks do not occur.
You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to
<b>pcre2_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). In this
case, the output changes to this:
<pre>
---&#62;aaaa
+0 ^ a+
+2 ^ ^ [bc]
+2 ^ ^ [bc]
+2 ^ ^ [bc]
+2 ^^ [bc]
No match
</pre>
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
again, repeatedly, until a+ itself fails.
</P>
<br><b>
Automatic .* anchoring
</b><br>
<P>
By default, an optimization is applied when .* is the first significant item in
a pattern. If PCRE2_DOTALL is set, so that the dot can match any character, the
pattern is automatically anchored. If PCRE2_DOTALL is not set, a match can
start only after an internal newline or at the beginning of the subject, and
<b>pcre2_compile()</b> remembers this. This optimization is disabled, however,
if .* is in an atomic group or if there is a back reference to the capturing
group in which it appears. It is also disabled if the pattern contains (*PRUNE)
or (*SKIP). However, the presence of callouts does not affect it.
</P>
<P>
For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT and
applied to the string "aa", the <b>pcre2test</b> output is:
<pre>
---&#62;aa
+0 ^ .*
+2 ^ ^ \d
+2 ^^ \d
+2 ^ \d
No match
</pre>
This shows that all match attempts start at the beginning of the subject. In
other words, the pattern is anchored. You can disable this optimization by
passing PCRE2_NO_DOTSTAR_ANCHOR to <b>pcre2_compile()</b>, or starting the
pattern with (*NO_DOTSTAR_ANCHOR). In this case, the output changes to:
<pre>
---&#62;aa
+0 ^ .*
+2 ^ ^ \d
+2 ^^ \d
+2 ^ \d
+0 ^ .*
+2 ^^ \d
+2 ^ \d
No match
</pre>
This shows more match attempts, starting at the second subject character.
Another optimization, described in the next section, means that there is no
subsequent attempt to match with an empty subject.
</P>
<P>
If a pattern has more than one top-level branch, automatic anchoring occurs if
all branches are anchorable.
</P>
<br><b>
Other optimizations
</b><br>
<P>
Other optimizations that provide fast "no match" results also affect callouts.
For example, if the pattern is
<pre>
ab(?C4)cd
</pre>
PCRE2 knows that any matching string must contain the letter "d". If the
subject string is "abyz", the lack of "d" means that matching doesn't ever
start, and the callout is never reached. However, with "abyd", though the
result is still no match, the callout is obeyed.
</P>
<P>
PCRE2 also knows the minimum length of a matching string, and will immediately
give a "no match" return without actually running a match if the subject is not
long enough, or, for unanchored patterns, if it has been scanned far enough.
</P>
<P>
You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE
option to <b>pcre2_compile()</b>, or by starting the pattern with
(*NO_START_OPT). This slows down the matching process, but does ensure that
callouts such as the example above are obeyed.
<a name="calloutinterface"></a></P>
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
<P>
During matching, when PCRE2 reaches a callout point, if an external function is
set in the match context, it is called. This applies to both normal and DFA
matching. The first argument to the callout function is a pointer to a
<b>pcre2_callout</b> block. The second argument is the void * callout data that
was supplied when the callout was set up by calling <b>pcre2_set_callout()</b>
(see the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation). The callout block structure contains the following fields:
<pre>
uint32_t <i>version</i>;
uint32_t <i>callout_number</i>;
uint32_t <i>capture_top</i>;
uint32_t <i>capture_last</i>;
PCRE2_SIZE *<i>offset_vector</i>;
PCRE2_SPTR <i>mark</i>;
PCRE2_SPTR <i>subject</i>;
PCRE2_SIZE <i>subject_length</i>;
PCRE2_SIZE <i>start_match</i>;
PCRE2_SIZE <i>current_position</i>;
PCRE2_SIZE <i>pattern_position</i>;
PCRE2_SIZE <i>next_item_length</i>;
PCRE2_SIZE <i>callout_string_offset</i>;
PCRE2_SIZE <i>callout_string_length</i>;
PCRE2_SPTR <i>callout_string</i>;
</pre>
The <i>version</i> field contains the version number of the block format. The
current version is 1; the three callout string fields were added for this
version. If you are writing an application that might use an earlier release of
PCRE2, you should check the version number before accessing any of these
fields. The version number will increase in future if more fields are added,
but the intention is never to remove any of the existing fields.
</P>
<br><b>
Fields for numerical callouts
</b><br>
<P>
For a numerical callout, <i>callout_string</i> is NULL, and <i>callout_number</i>
contains the number of the callout, in the range 0-255. This is the number
that follows (?C for manual callouts; it is 255 for automatically generated
callouts.
</P>
<br><b>
Fields for string callouts
</b><br>
<P>
For callouts with string arguments, <i>callout_number</i> is always zero, and
<i>callout_string</i> points to the string that is contained within the compiled
pattern. Its length is given by <i>callout_string_length</i>. Duplicated ending
delimiters that were present in the original pattern string have been turned
into single characters, but there is no other processing of the callout string
argument. An additional code unit containing binary zero is present after the
string, but is not included in the length. The delimiter that was used to start
the string is also stored within the pattern, immediately before the string
itself. You can access this delimiter as <i>callout_string</i>[-1] if you need
it.
</P>
<P>
The <i>callout_string_offset</i> field is the code unit offset to the start of
the callout argument string within the original pattern string. This is
provided for the benefit of applications such as script languages that might
need to report errors in the callout string within the pattern.
</P>
<br><b>
Fields for all callouts
</b><br>
<P>
The remaining fields in the callout block are the same for both kinds of
callout.
</P>
<P>
The <i>offset_vector</i> field is a pointer to the vector of capturing offsets
(the "ovector") that was passed to the matching function in the match data
block. When <b>pcre2_match()</b> is used, the contents can be inspected in
order to extract substrings that have been matched so far, in the same way as
for extracting substrings after a match has completed. For the DFA matching
function, this field is not useful.
</P>
<P>
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
that were passed to the matching function.
</P>
<P>
The <i>start_match</i> field normally contains the offset within the subject at
which the current match attempt started. However, if the escape sequence \K
has been encountered, this value is changed to reflect the modified starting
point. If the pattern is not anchored, the callout function may be called
several times from the same point in the pattern for different starting points
in the subject.
</P>
<P>
The <i>current_position</i> field contains the offset within the subject of the
current match pointer.
</P>
<P>
When the <b>pcre2_match()</b> is used, the <i>capture_top</i> field contains one
more than the number of the highest numbered captured substring so far. If no
substrings have been captured, the value of <i>capture_top</i> is one. This is
always the case when the DFA functions are used, because they do not support
captured substrings.
</P>
<P>
The <i>capture_last</i> field contains the number of the most recently captured
substring. However, when a recursion exits, the value reverts to what it was
outside the recursion, as do the values of all captured substrings. If no
substrings have been captured, the value of <i>capture_last</i> is 0. This is
always the case for the DFA matching functions.
</P>
<P>
The <i>pattern_position</i> field contains the offset in the pattern string to
the next item to be matched.
</P>
<P>
The <i>next_item_length</i> field contains the length of the next item to be
matched in the pattern string. When the callout immediately precedes an
alternation bar, a closing parenthesis, or the end of the pattern, the length
is zero. When the callout precedes an opening parenthesis, the length is that
of the entire subpattern.
</P>
<P>
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
help in distinguishing between different automatic callouts, which all have the
same callout number. However, they are set for all callouts, and are used by
<b>pcre2test</b> to show the next item to be matched when displaying callout
information.
</P>
<P>
In callouts from <b>pcre2_match()</b> the <i>mark</i> field contains a pointer to
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
(*THEN) item in the match, or NULL if no such items have been passed. Instances
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
callouts from the DFA matching function this field always contains NULL.
</P>
<br><a name="SEC5" href="#TOC1">RETURN VALUES FROM CALLOUTS</a><br>
<P>
The external callout function returns an integer to PCRE2. If the value is
zero, matching proceeds as normal. If the value is greater than zero, matching
fails at the current point, but the testing of other matching possibilities
goes ahead, just as if a lookahead assertion had failed. If the value is less
than zero, the match is abandoned, and the matching function returns the
negative value.
</P>
<P>
Negative values should normally be chosen from the set of PCRE2_ERROR_xxx
values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match"
failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
functions; it will never be used by PCRE2 itself.
</P>
<br><a name="SEC6" href="#TOC1">CALLOUT ENUMERATION</a><br>
<P>
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
<b> void *<i>user_data</i>);</b>
<br>
<br>
A script language that supports the use of string arguments in callouts might
like to scan all the callouts in a pattern before running the match. This can
be done by calling <b>pcre2_callout_enumerate()</b>. The first argument is a
pointer to a compiled pattern, the second points to a callback function, and
the third is arbitrary user data. The callback function is called for every
callout in the pattern in the order in which they appear. Its first argument is
a pointer to a callout enumeration block, and its second argument is the
<i>user_data</i> value that was passed to <b>pcre2_callout_enumerate()</b>. The
data block contains the following fields:
<pre>
<i>version</i> Block version number
<i>pattern_position</i> Offset to next item in pattern
<i>next_item_length</i> Length of next item in pattern
<i>callout_number</i> Number for numbered callouts
<i>callout_string_offset</i> Offset to string within pattern
<i>callout_string_length</i> Length of callout string
<i>callout_string</i> Points to callout string or is NULL
</pre>
The version number is currently 0. It will increase if new fields are ever
added to the block. The remaining fields are the same as their namesakes in the
<b>pcre2_callout</b> block that is used for callouts during matching, as
described
<a href="#calloutinterface">above.</a>
</P>
<P>
Note that the value of <i>pattern_position</i> is unique for each callout.
However, if a callout occurs inside a group that is quantified with a non-zero
minimum or a fixed maximum, the group is replicated inside the compiled
pattern. For example, a pattern such as /(a){2}/ is compiled as if it were
/(a)(a)/. This means that the callout will be enumerated more than once, but
with the same value for <i>pattern_position</i> in each case.
</P>
<P>
The callback function should normally return zero. If it returns a non-zero
value, scanning the pattern stops, and that value is returned from
<b>pcre2_callout_enumerate()</b>.
</P>
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge, England.
<br>
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
Last updated: 23 March 2015
<br>
Copyright &copy; 1997-2015 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,223 @@
<html>
<head>
<title>pcre2compat specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2compat man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
DIFFERENCES BETWEEN PCRE2 AND PERL
</b><br>
<P>
This document describes the differences in the ways that PCRE2 and Perl handle
regular expressions. The differences described here are with respect to Perl
versions 5.10 and above.
</P>
<P>
1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
have are given in the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
page.
</P>
<P>
2. PCRE2 allows repeat quantifiers only on parenthesized assertions, but they
do not mean what you might think. For example, (?!a){3} does not assert that
the next three characters are not "a". It just asserts that the next character
is not "a" three times (in principle: PCRE2 optimizes this to run the assertion
just once). Perl allows repeat quantifiers on other assertions such as \b, but
these do not seem to have any use.
</P>
<P>
3. Capturing subpatterns that occur inside negative lookahead assertions are
counted, but their entries in the offsets vector are never set. Perl sometimes
(but not always) sets its numerical variables from inside negative assertions.
</P>
<P>
4. The following Perl escape sequences are not supported: \l, \u, \L,
\U, and \N when followed by a character name or Unicode value. (\N on its
own, matching a non-newline character, is supported.) In fact these are
implemented by Perl's general string-handling and are not part of its pattern
matching engine. If any of these are encountered by PCRE2, an error is
generated by default. However, if the PCRE2_ALT_BSUX option is set,
\U and \u are interpreted as ECMAScript interprets them.
</P>
<P>
5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
built with Unicode support. The properties that can be tested with \p and \P
are limited to the general category properties such as Lu and Nd, script names
such as Greek or Han, and the derived properties Any and L&. PCRE2 does support
the Cs (surrogate) property, which Perl does not; the Perl documentation says
"Because Perl hides the need for the user to understand the internal
representation of Unicode characters, there is no need to implement the
somewhat messy concept of surrogates."
</P>
<P>
6. PCRE2 does support the \Q...\E escape for quoting substrings. Characters
in between are treated as literals. This is slightly different from Perl in
that $ and @ are also handled as literals inside the quotes. In Perl, they
cause variable interpolation (but of course PCRE2 does not have variables).
Note the following examples:
<pre>
Pattern PCRE2 matches Perl matches
\Qabc$xyz\E abc$xyz abc followed by the contents of $xyz
\Qabc\$xyz\E abc\$xyz abc\$xyz
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
</pre>
The \Q...\E sequence is recognized both inside and outside character classes.
</P>
<P>
7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
constructions. However, there is support for recursive patterns. This is not
available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE2 "callout"
feature allows an external function to be called during pattern matching. See
the
<a href="pcre2callout.html"><b>pcre2callout</b></a>
documentation for details.
</P>
<P>
8. Subroutine calls (whether recursive or not) are treated as atomic groups.
Atomic recursion is like Python, but unlike Perl. Captured values that are set
outside a subroutine call can be referenced from inside in PCRE2, but not in
Perl. There is a discussion that explains these differences in more detail in
the
<a href="pcre2pattern.html#recursiondifference">section on recursion differences from Perl</a>
in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
page.
</P>
<P>
9. If any of the backtracking control verbs are used in a subpattern that is
called as a subroutine (whether or not recursively), their effect is confined
to that subpattern; it does not extend to the surrounding pattern. This is not
always the case in Perl. In particular, if (*THEN) is present in a group that
is called as a subroutine, its action is limited to that group, even if the
group does not contain any | characters. Note that such subpatterns are
processed as anchored at the point where they are tested.
</P>
<P>
10. If a pattern contains more than one backtracking control verb, the first
one that is backtracked onto acts. For example, in the pattern
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
same as PCRE2, but there are examples where it differs.
</P>
<P>
11. Most backtracking verbs in assertions have their normal actions. They are
not confined to the assertion.
</P>
<P>
12. There are some differences that are concerned with the settings of captured
strings when part of a pattern is repeated. For example, matching "aba" against
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
"b".
</P>
<P>
13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
names is not as general as Perl's. This is a consequence of the fact the PCRE2
works internally just with numbers, using an external table to translate
between numbers and names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b)B),
where the two capturing parentheses have the same number but different names,
is not supported, and causes an error at compile time. If it were allowed, it
would not be possible to distinguish which parentheses matched, because both
names map to capturing subpattern number 1. To avoid this confusing situation,
an error is given at compile time.
</P>
<P>
14. Perl recognizes comments in some places that PCRE2 does not, for example,
between the ( and ? at the start of a subpattern. If the /x modifier is set,
Perl allows white space between ( and ? (though current Perls warn that this is
deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set.
</P>
<P>
15. Perl, when in warning mode, gives warnings for character classes such as
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
warning features, so it gives an error in these cases because they are almost
certainly user mistakes.
</P>
<P>
16. In PCRE2, the upper/lower case character properties Lu and Ll are not
affected when case-independent matching is specified. For example, \p{Lu}
always matches an upper case letter. I think Perl has changed in this respect;
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
letters, regardless of case, when case independence is specified.
</P>
<P>
17. PCRE2 provides some extensions to the Perl regular expression facilities.
Perl 5.10 includes new features that are not in earlier versions of Perl, some
of which (such as named parentheses) have been in PCRE2 for some time. This
list is with respect to Perl 5.10:
<br>
<br>
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
each alternative branch of a lookbehind assertion can match a different length
of string. Perl requires them all to have the same length.
<br>
<br>
(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
meta-character matches only at the very end of the string.
<br>
<br>
(c) A backslash followed by a letter with no special meaning is faulted. (Perl
can be made to issue a warning.)
<br>
<br>
(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
inverted, that is, by default they are not greedy, but if followed by a
question mark they are.
<br>
<br>
(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
only at the first matching position in the subject string.
<br>
<br>
(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.
<br>
<br>
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
by the PCRE2_BSR_ANYCRLF option.
<br>
<br>
(h) The callout facility is PCRE2-specific.
<br>
<br>
(i) The partial matching facility is PCRE2-specific.
<br>
<br>
(j) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
different way and is not Perl-compatible.
<br>
<br>
(k) PCRE2 recognizes some special sequences such as (*CR) at the start of
a pattern that set overall options that cannot be changed within the pattern.
</P>
<br><b>
AUTHOR
</b><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge, England.
<br>
</P>
<br><b>
REVISION
</b><br>
<P>
Last updated: 15 March 2015
<br>
Copyright &copy; 1997-2015 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,443 @@
<html>
<head>
<title>pcre2demo specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2demo man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
</ul>
<PRE>
/*************************************************
* PCRE2 DEMONSTRATION PROGRAM *
*************************************************/
/* This is a demonstration program to illustrate a straightforward way of
calling the PCRE2 regular expression library from a C program. See the
pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library.
In Unix-like environments, if PCRE2 is installed in your standard system
libraries, you should be able to compile this program using this command:
gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
If PCRE2 is not installed in a standard place, it is likely to be installed
with support for the pkg-config mechanism. If you have pkg-config, you can
compile this program using this command:
gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
If you do not have pkg-config, you may have to use this:
gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
-R/usr/local/lib -lpcre2-8 -o pcre2demo
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
library files for PCRE2 are installed on your system. Only some operating
systems (Solaris is one) use the -R option.
Building under Windows:
If you want to statically link this program against a non-dll .a file, you must
define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
the following line. */
/* #define PCRE2_STATIC */
/* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names
such as pcre2_compile(). */
#define PCRE2_CODE_UNIT_WIDTH 8
#include &lt;stdio.h&gt;
#include &lt;string.h&gt;
#include &lt;pcre2.h&gt;
/**************************************************************************
* Here is the program. The API includes the concept of "contexts" for *
* setting up unusual interface requirements for compiling and matching, *
* such as custom memory managers and non-standard newline definitions. *
* This program does not do any of this, so it makes no use of contexts, *
* always passing NULL where a context could be given. *
**************************************************************************/
int main(int argc, char **argv)
{
pcre2_code *re;
PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */
PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */
PCRE2_SPTR name_table;
int crlf_is_newline;
int errornumber;
int find_all;
int i;
int namecount;
int name_entry_size;
int rc;
int utf8;
uint32_t option_bits;
uint32_t newline;
PCRE2_SIZE erroroffset;
PCRE2_SIZE *ovector;
size_t subject_length;
pcre2_match_data *match_data;
/**************************************************************************
* First, sort out the command line. There is only one possible option at *
* the moment, "-g" to request repeated matching to find all occurrences, *
* like Perl's /g option. We set the variable find_all to a non-zero value *
* if the -g option is present. Apart from that, there must be exactly two *
* arguments. *
**************************************************************************/
find_all = 0;
for (i = 1; i &lt; argc; i++)
{
if (strcmp(argv[i], "-g") == 0) find_all = 1;
else break;
}
/* After the options, we require exactly two arguments, which are the pattern,
and the subject string. */
if (argc - i != 2)
{
printf("Two arguments required: a regex and a subject string\n");
return 1;
}
/* As pattern and subject are char arguments, they can be straightforwardly
cast to PCRE2_SPTR as we are working in 8-bit code units. */
pattern = (PCRE2_SPTR)argv[i];
subject = (PCRE2_SPTR)argv[i+1];
subject_length = strlen((char *)subject);
/*************************************************************************
* Now we are going to compile the regular expression pattern, and handle *
* any errors that are detected. *
*************************************************************************/
re = pcre2_compile(
pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */
&amp;errornumber, /* for error number */
&amp;erroroffset, /* for error offset */
NULL); /* use default compile context */
/* Compilation failed: print the error message and exit. */
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer);
return 1;
}
/*************************************************************************
* If the compilation succeeded, we call PCRE again, in order to do a *
* pattern match against the subject string. This does just ONE match. If *
* further matching is needed, it will be done below. Before running the *
* match we must set up a match_data block for holding the result. *
*************************************************************************/
/* Using this function ensures that the block is exactly the right size for
the number of capturing parentheses in the pattern. */
match_data = pcre2_match_data_create_from_pattern(re, NULL);
rc = pcre2_match(
re, /* the compiled pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
match_data, /* block for storing the result */
NULL); /* use default match context */
/* Matching failed: handle error cases */
if (rc &lt; 0)
{
switch(rc)
{
case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
pcre2_match_data_free(match_data); /* Release memory used for the match */
pcre2_code_free(re); /* data and the compiled pattern. */
return 1;
}
/* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */
ovector = pcre2_get_ovector_pointer(match_data);
printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
/*************************************************************************
* We have found the first match within the subject string. If the output *
* vector wasn't big enough, say so. Then output any substrings that were *
* captured. *
*************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */
if (rc == 0)
printf("ovector was not big enough for all the captured substrings\n");
/* Show substrings stored in the output vector by number. Obviously, in a real
application you might want to do things other than print them. */
for (i = 0; i &lt; rc; i++)
{
PCRE2_SPTR substring_start = subject + ovector[2*i];
size_t substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
}
/**************************************************************************
* That concludes the basic part of this demonstration program. We have *
* compiled a pattern, and performed a single match. The code that follows *
* shows first how to access named substrings, and then how to code for *
* repeated matches on the same subject. *
**************************************************************************/
/* See if there are any named substrings, and if so, show them by name. First
we have to extract the count of named parentheses from the pattern. */
(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
&amp;namecount); /* where to put the answer */
if (namecount &lt;= 0) printf("No named substrings\n"); else
{
PCRE2_SPTR tabptr;
printf("Named substrings\n");
/* Before we can access the substrings, we must extract the table for
translating names to numbers, and the size of each entry in the table. */
(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMETABLE, /* address of the table */
&amp;name_table); /* where to put the answer */
(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
&amp;name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */
tabptr = name_table;
for (i = 0; i &lt; namecount; i++)
{
int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
/*************************************************************************
* If the "-g" option was given on the command line, we want to continue *
* to search for additional matches in the subject string, in a similar *
* way to the /g option in Perl. This turns out to be trickier than you *
* might think because of the possibility of matching an empty string. *
* What happens is as follows: *
* *
* If the previous match was NOT for an empty string, we can just start *
* the next match at the end of the previous one. *
* *
* If the previous match WAS for an empty string, we can't do that, as it *
* would lead to an infinite loop. Instead, a call of pcre2_match() is *
* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
* first of these tells PCRE2 that an empty string at the start of the *
* subject is not a valid match; other possibilities must be tried. The *
* second flag restricts PCRE2 to one match attempt at the initial string *
* position. If this match succeeds, an alternative to the empty string *
* match has been found, and we can print it and proceed round the loop, *
* advancing by the length of whatever was found. If this match does not *
* succeed, we still stay in the loop, advancing by just one character. *
* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
* more than one byte. *
* *
* However, there is a complication concerned with newlines. When the *
* newline convention is such that CRLF is a valid newline, we must *
* advance by two characters rather than one. The newline convention can *
* be set in the regex by (*CR), etc.; if not, we must find the default. *
*************************************************************************/
if (!find_all) /* Check for -g */
{
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */
}
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
sequence. First, find the options with which the regex was compiled and extract
the UTF state. */
(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &amp;option_bits);
utf8 = (option_bits &amp; PCRE2_UTF) != 0;
/* Now find the newline convention and see whether CRLF is a valid newline
sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &amp;newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */
for (;;)
{
uint32_t options = 0; /* Normally no options */
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
/* If the previous match was for an empty string, we are finished if we are
at the end of the subject. Otherwise, arrange to run another match at the
same point to see if a non-empty match can be found. */
if (ovector[0] == ovector[1])
{
if (ovector[0] == subject_length) break;
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
/* Run the next matching operation */
rc = pcre2_match(
re, /* the compiled pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
start_offset, /* starting offset in the subject */
options, /* options */
match_data, /* block for storing the result */
NULL); /* use default match context */
/* This time, a result of NOMATCH isn't an error. If the value in "options"
is zero, it just means we have found all possible matches, so the loop ends.
Otherwise, it means we have failed to find a non-empty-string match at a
point where there was a previous empty-string match. In this case, we do what
Perl does: advance the matching position by one character, and continue. We
do this by setting the "end of previous match" offset, because that is picked
up at the top of the loop as the point at which to start again.
There are two complications: (a) When CRLF is a valid newline sequence, and
the current position is just before it, advance by an extra byte. (b)
Otherwise we must ensure that we skip an entire UTF character if we are in
UTF mode. */
if (rc == PCRE2_ERROR_NOMATCH)
{
if (options == 0) break; /* All matches found */
ovector[1] = start_offset + 1; /* Advance one code unit */
if (crlf_is_newline &amp;&amp; /* If CRLF is newline &amp; */
start_offset &lt; subject_length - 1 &amp;&amp; /* we are at CRLF, */
subject[start_offset] == '\r' &amp;&amp;
subject[start_offset + 1] == '\n')
ovector[1] += 1; /* Advance by one more. */
else if (utf8) /* Otherwise, ensure we */
{ /* advance a whole UTF-8 */
while (ovector[1] &lt; subject_length) /* character. */
{
if ((subject[ovector[1]] &amp; 0xc0) != 0x80) break;
ovector[1] += 1;
}
}
continue; /* Go round the loop again */
}
/* Other matching errors are not recoverable. */
if (rc &lt; 0)
{
printf("Matching error %d\n", rc);
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 1;
}
/* Match succeded */
printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
/* The match succeeded, but the output vector wasn't big enough. This
should not happen. */
if (rc == 0)
printf("ovector was not big enough for all the captured substrings\n");
/* As before, show substrings stored in the output vector by number, and then
also any named substrings. */
for (i = 0; i &lt; rc; i++)
{
PCRE2_SPTR substring_start = subject + ovector[2*i];
size_t substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
}
if (namecount &lt;= 0) printf("No named substrings\n"); else
{
PCRE2_SPTR tabptr = name_table;
printf("Named substrings\n");
for (i = 0; i &lt; namecount; i++)
{
int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
(int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
} /* End of loop to find second and subsequent matches */
printf("\n");
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 0;
}
/* End of pcre2demo.c */
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,783 @@
<html>
<head>
<title>pcre2grep specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2grep man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
<li><a name="TOC4" href="#SEC4">BINARY FILES</a>
<li><a name="TOC5" href="#SEC5">OPTIONS</a>
<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
<li><a name="TOC10" href="#SEC10">MATCHING ERRORS</a>
<li><a name="TOC11" href="#SEC11">DIAGNOSTICS</a>
<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
<li><a name="TOC13" href="#SEC13">AUTHOR</a>
<li><a name="TOC14" href="#SEC14">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>pcre2grep [options] [long options] [pattern] [path1 path2 ...]</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
<b>pcre2grep</b> searches files for character patterns, in the same way as other
grep commands do, but it uses the PCRE2 regular expression library to support
patterns that are compatible with the regular expressions of Perl 5. See
<a href="pcre2syntax.html"><b>pcre2syntax</b>(3)</a>
for a quick-reference summary of pattern syntax, or
<a href="pcre2pattern.html"><b>pcre2pattern</b>(3)</a>
for a full description of the syntax and semantics of the regular expressions
that PCRE2 supports.
</P>
<P>
Patterns, whether supplied on the command line or in a separate file, are given
without delimiters. For example:
<pre>
pcre2grep Thursday /etc/motd
</pre>
If you attempt to use delimiters (for example, by surrounding a pattern with
slashes, as is common in Perl scripts), they are interpreted as part of the
pattern. Quotes can of course be used to delimit patterns on the command line
because they are interpreted by the shell, and indeed quotes are required if a
pattern contains white space or shell metacharacters.
</P>
<P>
The first argument that follows any option settings is treated as the single
pattern to be matched when neither <b>-e</b> nor <b>-f</b> is present.
Conversely, when one or both of these options are used to specify patterns, all
arguments are treated as path names. At least one of <b>-e</b>, <b>-f</b>, or an
argument pattern must be provided.
</P>
<P>
If no files are specified, <b>pcre2grep</b> reads the standard input. The
standard input can also be referenced by a name consisting of a single hyphen.
For example:
<pre>
pcre2grep some-pattern file1 - file3
</pre>
Input files are searched line by line. By default, each line that matches a
pattern is copied to the standard output, and if there is more than one file,
the file name is output at the start of each line, followed by a colon.
However, there are options that can change how <b>pcre2grep</b> behaves. In
particular, the <b>-M</b> option makes it possible to search for strings that
span line boundaries. What defines a line boundary is controlled by the
<b>-N</b> (<b>--newline</b>) option.
</P>
<P>
The amount of memory used for buffering files that are being scanned is
controlled by a parameter that can be set by the <b>--buffer-size</b> option.
The default value for this parameter is specified when <b>pcre2grep</b> is
built, with the default default being 20K. A block of memory three times this
size is used (to allow for buffering "before" and "after" lines). An error
occurs if a line overflows the buffer.
</P>
<P>
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
each line in the order in which they are defined, except that all the <b>-e</b>
patterns are tried before the <b>-f</b> patterns.
</P>
<P>
By default, as soon as one pattern matches a line, no further patterns are
considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
<b>--line-offsets</b> is used to output only the part of the line that matched
(either shown literally, or as an offset), scanning resumes immediately
following the match, so that further matches on the same line can be found. If
there are multiple patterns, they are all tried on the remainder of the line,
but patterns that follow the one that matched are not tried on the earlier part
of the line.
</P>
<P>
This behaviour means that the order in which multiple patterns are specified
can affect the output when one of the above options is used. This is no longer
the same behaviour as GNU grep, which now manages to display earlier matches
for later patterns (as long as there is no overlap).
</P>
<P>
Patterns that can match an empty string are accepted, but empty string
matches are never recognized. An example is the pattern "(super)?(man)?", in
which all components are optional. This pattern finds all occurrences of both
"super" and "man"; the output differs from matching with "super|man" when only
the matching substrings are being shown.
</P>
<P>
If the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variable is set,
<b>pcre2grep</b> uses the value to set a locale when calling the PCRE2 library.
The <b>--locale</b> option can be used to override this.
</P>
<br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
<P>
It is possible to compile <b>pcre2grep</b> so that it uses <b>libz</b> or
<b>libbz2</b> to read files whose names end in <b>.gz</b> or <b>.bz2</b>,
respectively. You can find out whether your binary has support for one or both
of these file types by running it with the <b>--help</b> option. If the
appropriate support is not present, files are treated as plain text. The
standard input is always so treated.
</P>
<br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
<P>
By default, a file that contains a binary zero byte within the first 1024 bytes
is identified as a binary file, and is processed specially. (GNU grep also
identifies binary files in this manner.) See the <b>--binary-files</b> option
for a means of changing the way binary files are handled.
</P>
<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
<P>
The order in which some of the options appear can affect the output. For
example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
names. Whichever comes later in the command line will be the one that takes
effect. Similarly, except where noted below, if an option is given twice, the
later setting is used. Numerical values for options may be followed by K or M,
to signify multiplication by 1024 or 1024*1024 respectively.
</P>
<P>
<b>--</b>
This terminates the list of options. It is useful if the next item on the
command line starts with a hyphen but is not an option. This allows for the
processing of patterns and file names that start with hyphens.
</P>
<P>
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
Output <i>number</i> lines of context after each matching line. If file names
and/or line numbers are being output, a hyphen separator is used instead of a
colon for the context lines. A line containing "--" is output between each
group of lines, unless they are in fact contiguous in the input file. The value
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
guarantees to have up to 8K of following text available for context output.
</P>
<P>
<b>-a</b>, <b>--text</b>
Treat binary files as text. This is equivalent to
<b>--binary-files</b>=<i>text</i>.
</P>
<P>
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
Output <i>number</i> lines of context before each matching line. If file names
and/or line numbers are being output, a hyphen separator is used instead of a
colon for the context lines. A line containing "--" is output between each
group of lines, unless they are in fact contiguous in the input file. The value
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
guarantees to have up to 8K of preceding text available for context output.
</P>
<P>
<b>--binary-files=</b><i>word</i>
Specify how binary files are to be processed. If the word is "binary" (the
default), pattern matching is performed on binary files, but the only output is
"Binary file &#60;name&#62; matches" when a match succeeds. If the word is "text",
which is equivalent to the <b>-a</b> or <b>--text</b> option, binary files are
processed in the same way as any other file. In this case, when a match
succeeds, the output may be binary garbage, which can have nasty effects if
sent to a terminal. If the word is "without-match", which is equivalent to the
<b>-I</b> option, binary files are not processed at all; they are assumed not to
be of interest and are skipped without causing any output or affecting the
return code.
</P>
<P>
<b>--buffer-size=</b><i>number</i>
Set the parameter that controls how much memory is used for buffering files
that are being scanned.
</P>
<P>
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
Output <i>number</i> lines of context both before and after each matching line.
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
</P>
<P>
<b>-c</b>, <b>--count</b>
Do not output lines from the files that are being scanned; instead output the
number of matches (or non-matches if <b>-v</b> is used) that would otherwise
have caused lines to be shown. By default, this count is the same as the number
of suppressed lines, but if the <b>-M</b> (multiline) option is used (without
<b>-v</b>), there may be more suppressed lines than the number of matches.
<br>
<br>
If no lines are selected, the number zero is output. If several files are are
being scanned, a count is output for each of them. However, if the
<b>--files-with-matches</b> option is also used, only those files whose counts
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
<b>-B</b>, and <b>-C</b> options are ignored.
</P>
<P>
<b>--colour</b>, <b>--color</b>
If this option is given without any data, it is equivalent to "--colour=auto".
If data is required, it must be given in the same shell item, separated by an
equals sign.
</P>
<P>
<b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
This option specifies under what circumstances the parts of a line that matched
a pattern should be coloured in the output. By default, the output is not
coloured. The value (which is optional, see above) may be "never", "always", or
"auto". In the latter case, colouring happens only if the standard output is
connected to a terminal. More resources are used when colouring is enabled,
because <b>pcre2grep</b> has to search for all possible matches in a line, not
just one, in order to colour them all.
<br>
<br>
The colour that is used can be specified by setting the environment variable
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
string of two numbers, separated by a semicolon. They are copied directly into
the control string for setting colour on a terminal, so it is your
responsibility to ensure that they make sense. If neither of the environment
variables is set, the default is "1;31", which gives red.
</P>
<P>
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
If an input path is not a regular file or a directory, "action" specifies how
it is to be processed. Valid values are "read" (the default) or "skip"
(silently skip the path).
</P>
<P>
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
If an input path is a directory, "action" specifies how it is to be processed.
Valid values are "read" (the default in non-Windows environments, for
compatibility with GNU grep), "recurse" (equivalent to the <b>-r</b> option), or
"skip" (silently skip the path, the default in Windows environments). In the
"read" case, directories are read as if they were ordinary files. In some
operating systems the effect of reading a directory like this is an immediate
end-of-file; in others it may provoke an error.
</P>
<P>
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
Specify a pattern to be matched. This option can be used multiple times in
order to specify several patterns. It can also be used as a way of specifying a
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
pattern is taken from the command line; all arguments are treated as file
names. There is no limit to the number of patterns. They are applied to each
line in the order in which they are defined until one matches.
<br>
<br>
If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
followed by the patterns from the file(s), independent of the order in which
these options are specified. Note that multiple use of <b>-e</b> is not the same
as a single pattern with alternatives. For example, X|Y finds the first
character in a line that is X or Y, whereas if the two patterns are given
separately, with X first, <b>pcre2grep</b> finds X if it is present, even if it
follows Y in the line. It finds Y only if there is no X in the line. This
matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
of the line that matched.
</P>
<P>
<b>--exclude</b>=<i>pattern</i>
Files (but not directories) whose names match the pattern are skipped without
being processed. This applies to all files, whether listed on the command line,
obtained from <b>--file-list</b>, or by scanning a directory. The pattern is a
PCRE2 regular expression, and is matched against the final component of the
file name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do
not apply to this pattern. The option may be given any number of times in order
to specify multiple patterns. If a file name matches both an <b>--include</b>
and an <b>--exclude</b> pattern, it is excluded. There is no short form for this
option.
</P>
<P>
<b>--exclude-from=</b><i>filename</i>
Treat each non-empty line of the file as the data for an <b>--exclude</b>
option. What constitutes a newline when reading the file is the operating
system's default. The <b>--newline</b> option has no effect on this option. This
option may be given more than once in order to specify a number of files to
read.
</P>
<P>
<b>--exclude-dir</b>=<i>pattern</i>
Directories whose names match the pattern are skipped without being processed,
whatever the setting of the <b>--recursive</b> option. This applies to all
directories, whether listed on the command line, obtained from
<b>--file-list</b>, or by scanning a parent directory. The pattern is a PCRE2
regular expression, and is matched against the final component of the directory
name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
apply to this pattern. The option may be given any number of times in order to
specify more than one pattern. If a directory matches both <b>--include-dir</b>
and <b>--exclude-dir</b>, it is excluded. There is no short form for this
option.
</P>
<P>
<b>-F</b>, <b>--fixed-strings</b>
Interpret each data-matching pattern as a list of fixed strings, separated by
newlines, instead of as a regular expression. What constitutes a newline for
this purpose is controlled by the <b>--newline</b> option. The <b>-w</b> (match
as a word) and <b>-x</b> (match whole line) options can be used with <b>-F</b>.
They apply to each of the fixed strings. A line is selected if any of the fixed
strings are found in it (subject to <b>-w</b> or <b>-x</b>, if present). This
option applies only to the patterns that are matched against the contents of
files; it does not apply to patterns specified by any of the <b>--include</b> or
<b>--exclude</b> options.
</P>
<P>
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
Read patterns from the file, one per line, and match them against
each line of input. What constitutes a newline when reading the file is the
operating system's default. The <b>--newline</b> option has no effect on this
option. Trailing white space is removed from each line, and blank lines are
ignored. An empty file contains no patterns and therefore matches nothing. See
also the comments about multiple patterns versus a single pattern with
alternatives in the description of <b>-e</b> above.
<br>
<br>
If this option is given more than once, all the specified files are
read. A data line is output if any of the patterns match it. A file name can
be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
specified on the command line using <b>-e</b> may also be present; they are
tested before the file's patterns. However, no other pattern is taken from the
command line; all arguments are treated as the names of paths to be searched.
</P>
<P>
<b>--file-list</b>=<i>filename</i>
Read a list of files and/or directories that are to be scanned from the given
file, one per line. Trailing white space is removed from each line, and blank
lines are ignored. These paths are processed before any that are listed on the
command line. The file name can be given as "-" to refer to the standard input.
If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
read first. This is useful only when the standard input is a terminal, from
which further lines (the list of files) can be read after an end-of-file
indication. If this option is given more than once, all the specified files are
read.
</P>
<P>
<b>--file-offsets</b>
Instead of showing lines or parts of lines that match, show each match as an
offset from the start of the file and a length, separated by a comma. In this
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
options are ignored. If there is more than one match in a line, each of them is
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
and <b>--only-matching</b>.
</P>
<P>
<b>-H</b>, <b>--with-filename</b>
Force the inclusion of the file name at the start of output lines when
searching a single file. By default, the file name is not shown in this case.
For matching lines, the file name is followed by a colon; for context lines, a
hyphen separator is used. If a line number is also being output, it follows the
file name. When the <b>-M</b> option causes a pattern to match more than one
line, only the first is preceded by the file name.
</P>
<P>
<b>-h</b>, <b>--no-filename</b>
Suppress the output file names when searching multiple files. By default,
file names are shown when multiple files are searched. For matching lines, the
file name is followed by a colon; for context lines, a hyphen separator is used.
If a line number is also being output, it follows the file name.
</P>
<P>
<b>--help</b>
Output a help message, giving brief details of the command options and file
type support, and then exit. Anything else on the command line is
ignored.
</P>
<P>
<b>-I</b>
Ignore binary files. This is equivalent to
<b>--binary-files</b>=<i>without-match</i>.
</P>
<P>
<b>-i</b>, <b>--ignore-case</b>
Ignore upper/lower case distinctions during comparisons.
</P>
<P>
<b>--include</b>=<i>pattern</i>
If any <b>--include</b> patterns are specified, the only files that are
processed are those that match one of the patterns (and do not match an
<b>--exclude</b> pattern). This option does not affect directories, but it
applies to all files, whether listed on the command line, obtained from
<b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
expression, and is matched against the final component of the file name, not
the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not apply to
this pattern. The option may be given any number of times. If a file name
matches both an <b>--include</b> and an <b>--exclude</b> pattern, it is excluded.
There is no short form for this option.
</P>
<P>
<b>--include-from=</b><i>filename</i>
Treat each non-empty line of the file as the data for an <b>--include</b>
option. What constitutes a newline for this purpose is the operating system's
default. The <b>--newline</b> option has no effect on this option. This option
may be given any number of times; all the files are read.
</P>
<P>
<b>--include-dir</b>=<i>pattern</i>
If any <b>--include-dir</b> patterns are specified, the only directories that
are processed are those that match one of the patterns (and do not match an
<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
on the command line, obtained from <b>--file-list</b>, or by scanning a parent
directory. The pattern is a PCRE2 regular expression, and is matched against
the final component of the directory name, not the entire path. The <b>-F</b>,
<b>-w</b>, and <b>-x</b> options do not apply to this pattern. The option may be
given any number of times. If a directory matches both <b>--include-dir</b> and
<b>--exclude-dir</b>, it is excluded. There is no short form for this option.
</P>
<P>
<b>-L</b>, <b>--files-without-match</b>
Instead of outputting lines from the files, just output the names of the files
that do not contain any lines that would have been output. Each file name is
output once, on a separate line.
</P>
<P>
<b>-l</b>, <b>--files-with-matches</b>
Instead of outputting lines from the files, just output the names of the files
containing lines that would have been output. Each file name is output
once, on a separate line. Searching normally stops as soon as a matching line
is found in a file. However, if the <b>-c</b> (count) option is also used,
matching continues in order to obtain the correct count, and those files that
have at least one match are listed along with their counts. Using this option
with <b>-c</b> is a way of suppressing the listing of files with no matches.
</P>
<P>
<b>--label</b>=<i>name</i>
This option supplies a name to be used for the standard input when file names
are being output. If not supplied, "(standard input)" is used. There is no
short form for this option.
</P>
<P>
<b>--line-buffered</b>
When this option is given, input is read and processed line by line, and the
output is flushed after each write. By default, input is read in large chunks,
unless <b>pcre2grep</b> can determine that it is reading from a terminal (which
is currently possible only in Unix-like environments). Output to terminal is
normally automatically flushed by the operating system. This option can be
useful when the input or output is attached to a pipe and you do not want
<b>pcre2grep</b> to buffer up large amounts of data. However, its use will
affect performance, and the <b>-M</b> (multiline) option ceases to work.
</P>
<P>
<b>--line-offsets</b>
Instead of showing lines or parts of lines that match, show each match as a
line number, the offset from the start of the line, and a length. The line
number is terminated by a colon (as usual; see the <b>-n</b> option), and the
offset and length are separated by a comma. In this mode, no context is shown.
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
more than one match in a line, each of them is shown separately. This option is
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
</P>
<P>
<b>--locale</b>=<i>locale-name</i>
This option specifies a locale to be used for pattern matching. It overrides
the value in the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variables. If no
locale is specified, the PCRE2 library's default (usually the "C" locale) is
used. There is no short form for this option.
</P>
<P>
<b>--match-limit</b>=<i>number</i>
Processing some regular expression patterns can require a very large amount of
memory, leading in some cases to a program crash if not enough is available.
Other patterns may take a very long time to search for all possible matching
strings. The <b>pcre2_match()</b> function that is called by <b>pcre2grep</b> to
do the matching has two parameters that can limit the resources that it uses.
<br>
<br>
The <b>--match-limit</b> option provides a means of limiting resource usage
when processing patterns that are not going to match, but which have a very
large number of possibilities in their search trees. The classic example is a
pattern that uses nested unlimited repeats. Internally, PCRE2 uses a function
called <b>match()</b> which it calls repeatedly (sometimes recursively). The
limit set by <b>--match-limit</b> is imposed on the number of times this
function is called during a match, which has the effect of limiting the amount
of backtracking that can take place.
<br>
<br>
The <b>--recursion-limit</b> option is similar to <b>--match-limit</b>, but
instead of limiting the total number of times that <b>match()</b> is called, it
limits the depth of recursive calls, which in turn limits the amount of memory
that can be used. The recursion depth is a smaller number than the total number
of calls, because not all calls to <b>match()</b> are recursive. This limit is
of use only if it is set smaller than <b>--match-limit</b>.
<br>
<br>
There are no short forms for these options. The default settings are specified
when the PCRE2 library is compiled, with the default default being 10 million.
</P>
<P>
<b>-M</b>, <b>--multiline</b>
Allow patterns to match more than one line. When this option is given, patterns
may usefully contain literal newline characters and internal occurrences of ^
and $ characters. The output for a successful match may consist of more than
one line. The first is the line in which the match started, and the last is the
line in which the match ended. If the matched string ends with a newline
sequence the output ends at the end of that line.
<br>
<br>
When this option is set, the PCRE2 library is called in "multiline" mode.
However, <b>pcre2grep</b> still processes the input line by line. The difference
is that a matched string may extend past the end of a line and continue on
one or more subsequent lines. The newline sequence must be matched as part of
the pattern. For example, to find the phrase "regular expression" in a file
where "regular" might be at the end of a line and "expression" at the start of
the next line, you could use this command:
<pre>
pcre2grep -M 'regular\s+expression' &#60;file&#62;
</pre>
The \s escape sequence matches any white space character, including newlines,
and is followed by + so as to match trailing white space on the first line as
well as possibly handling a two-character newline sequence.
<br>
<br>
There is a limit to the number of lines that can be matched, imposed by the way
that <b>pcre2grep</b> buffers the input file as it scans it. However,
<b>pcre2grep</b> ensures that at least 8K characters or the rest of the file
(whichever is the shorter) are available for forward matching, and similarly
the previous 8K characters (or all the previous characters, if fewer than 8K)
are guaranteed to be available for lookbehind assertions. The <b>-M</b> option
does not work when input is read line by line (see \fP--line-buffered\fP.)
</P>
<P>
<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
The PCRE2 library supports five different conventions for indicating
the ends of lines. They are the single-character sequences CR (carriage return)
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
which recognizes any of the preceding three types, and an "any" convention, in
which any Unicode line ending sequence is assumed to end a line. The Unicode
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
PS (paragraph separator, U+2029).
<br>
<br>
When the PCRE2 library is built, a default line-ending sequence is specified.
This is normally the standard sequence for the operating system. Unless
otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
makes it possible to use <b>pcre2grep</b> to scan files that have come from
other environments without having to modify their line endings. If the data
that is being scanned does not agree with the convention set by this option,
<b>pcre2grep</b> may behave in strange ways. Note that this option does not
apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
<b>--include-from</b> options, which are expected to use the operating system's
standard newline sequence.
</P>
<P>
<b>-n</b>, <b>--line-number</b>
Precede each output line by its line number in the file, followed by a colon
for matching lines or a hyphen for context lines. If the file name is also
being output, it precedes the line number. When the <b>-M</b> option causes a
pattern to match more than one line, only the first is preceded by its line
number. This option is forced if <b>--line-offsets</b> is used.
</P>
<P>
<b>--no-jit</b>
If the PCRE2 library is built with support for just-in-time compiling (which
speeds up matching), <b>pcre2grep</b> automatically makes use of this, unless it
was explicitly disabled at build time. This option can be used to disable the
use of JIT at run time. It is provided for testing and working round problems.
It should never be needed in normal use.
</P>
<P>
<b>-o</b>, <b>--only-matching</b>
Show only the part of the line that matched a pattern instead of the whole
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
<b>-C</b> options are ignored. If there is more than one match in a line, each
of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
sense of the match to find non-matching lines), no output is generated, but the
return code is set appropriately. If the matched portion of the line is empty,
nothing is output unless the file name or line number are being printed, in
which case they are shown on an otherwise empty line. This option is mutually
exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
</P>
<P>
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
Show only the part of the line that matched the capturing parentheses of the
given number. Up to 32 capturing parentheses are supported, and -o0 is
equivalent to <b>-o</b> without a number. Because these options can be given
without an argument (see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2. The comments given
for the non-argument case above also apply to this case. If the specified
capturing parentheses do not exist in the pattern, or were not set in the
match, nothing is output unless the file name or line number are being output.
<br>
<br>
If this option is given multiple times, multiple substrings are output, in the
order the options are given. For example, -o3 -o1 -o3 causes the substrings
matched by capturing parentheses 3 and 1 and then 3 again to be output. By
default, there is no separator (but see the next option).
</P>
<P>
<b>--om-separator</b>=<i>text</i>
Specify a separating string for multiple occurrences of <b>-o</b>. The default
is an empty string. Separating strings are never coloured.
</P>
<P>
<b>-q</b>, <b>--quiet</b>
Work quietly, that is, display nothing except error messages. The exit
status indicates whether or not any matches were found.
</P>
<P>
<b>-r</b>, <b>--recursive</b>
If any given path is a directory, recursively scan the files it contains,
taking note of any <b>--include</b> and <b>--exclude</b> settings. By default, a
directory is read as a normal file; in some operating systems this gives an
immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
option to "recurse".
</P>
<P>
<b>--recursion-limit</b>=<i>number</i>
See <b>--match-limit</b> above.
</P>
<P>
<b>-s</b>, <b>--no-messages</b>
Suppress error messages about non-existent or unreadable files. Such files are
quietly skipped. However, the return code is still 2, even if matches were
found in other files.
</P>
<P>
<b>-u</b>, <b>--utf-8</b>
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
<b>--include</b> options) and all subject lines that are scanned must be valid
strings of UTF-8 characters.
</P>
<P>
<b>-V</b>, <b>--version</b>
Write the version numbers of <b>pcre2grep</b> and the PCRE2 library to the
standard output and then exit. Anything else on the command line is
ignored.
</P>
<P>
<b>-v</b>, <b>--invert-match</b>
Invert the sense of the match, so that lines which do <i>not</i> match any of
the patterns are the ones that are found.
</P>
<P>
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
Force the patterns to match only whole words. This is equivalent to having \b
at the start and end of the pattern. This option applies only to the patterns
that are matched against the contents of files; it does not apply to patterns
specified by any of the <b>--include</b> or <b>--exclude</b> options.
</P>
<P>
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
Force the patterns to be anchored (each must start matching at the beginning of
a line) and in addition, require them to match entire lines. This is equivalent
to having ^ and $ characters at the start and end of each alternative top-level
branch in every pattern. This option applies only to the patterns that are
matched against the contents of files; it does not apply to patterns specified
by any of the <b>--include</b> or <b>--exclude</b> options.
</P>
<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
<P>
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
order, for a locale. The first one that is set is used. This can be overridden
by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
(usually the "C" locale) is used.
</P>
<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
<P>
The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
different newline conventions from the default. Any parts of the input files
that are written to the standard output are copied identically, with whatever
newline sequences they have in the input. However, the setting of this option
does not affect the interpretation of files specified by the <b>-f</b>,
<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
the operating system's standard newline sequence, nor does it affect the way in
which <b>pcre2grep</b> writes informational messages to the standard error and
output streams. For these it uses the string "\n" to indicate newlines,
relying on the C I/O library to convert this to an appropriate sequence.
</P>
<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
<P>
Many of the short and long forms of <b>pcre2grep</b>'s options are the same
as in the GNU <b>grep</b> program. Any long option of the form
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
(PCRE2 terminology). However, the <b>--file-list</b>, <b>--file-offsets</b>,
<b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
<b>--recursion-limit</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
capturing parentheses number.
</P>
<P>
Although most of the common options work the same way, a few are different in
<b>pcre2grep</b>. For example, the <b>--include</b> option's argument is a glob
for GNU <b>grep</b>, but a regular expression for <b>pcre2grep</b>. If both the
<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
without counts, but <b>pcre2grep</b> gives the counts as well.
</P>
<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
<P>
There are four different ways in which an option with data can be specified.
If a short form option is used, the data may follow immediately, or (with one
exception) in the next command line item. For example:
<pre>
-f/some/file
-f /some/file
</pre>
The exception is the <b>-o</b> option, which may appear with or without data.
Because of this, if data is present, it must follow immediately in the same
item, for example -o3.
</P>
<P>
If a long form option is used, the data may appear in the same command line
item, separated by an equals character, or (with two exceptions) it may appear
in the next command line item. For example:
<pre>
--file=/some/file
--file /some/file
</pre>
Note, however, that if you want to supply a file name beginning with ~ as data
in a shell command, and have the shell expand ~ to a home directory, you must
separate the file name from the option, because the shell does not treat ~
specially unless it is at the start of an item.
</P>
<P>
The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
<b>--only-matching</b> options, for which the data is optional. If one of these
options does have data, it must be given in the first form, using an equals
character. Otherwise <b>pcre2grep</b> will assume that it has no data.
</P>
<br><a name="SEC10" href="#TOC1">MATCHING ERRORS</a><br>
<P>
It is possible to supply a regular expression that takes a very long time to
fail to match certain lines. Such patterns normally involve nested indefinite
repeats, for example: (a+)*\d when matched against a line of a's with no final
digit. The PCRE2 matching function has a resource limit that causes it to abort
in these circumstances. If this happens, <b>pcre2grep</b> outputs an error
message and the line that caused the problem to the standard error stream. If
there are more than 20 such errors, <b>pcre2grep</b> gives up.
</P>
<P>
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
overall resource limit; there is a second option called <b>--recursion-limit</b>
that sets a limit on the amount of memory (usually stack) that is used (see the
discussion of these options above).
</P>
<br><a name="SEC11" href="#TOC1">DIAGNOSTICS</a><br>
<P>
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
for syntax errors, overlong lines, non-existent or inaccessible files (even if
matches were found in other files) or too many matching errors. Using the
<b>-s</b> option to suppress error messages about inaccessible files does not
affect the return code.
</P>
<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3).
</P>
<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge, England.
<br>
</P>
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
<P>
Last updated: 03 January 2015
<br>
Copyright &copy; 1997-2015 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,428 @@
<html>
<head>
<title>pcre2jit specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2jit man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a>
<li><a name="TOC2" href="#SEC2">AVAILABILITY OF JIT SUPPORT</a>
<li><a name="TOC3" href="#SEC3">SIMPLE USE OF JIT</a>
<li><a name="TOC4" href="#SEC4">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM JIT MATCHING</a>
<li><a name="TOC6" href="#SEC6">CONTROLLING THE JIT STACK</a>
<li><a name="TOC7" href="#SEC7">JIT STACK FAQ</a>
<li><a name="TOC8" href="#SEC8">FREEING JIT SPECULATIVE MEMORY</a>
<li><a name="TOC9" href="#SEC9">EXAMPLE CODE</a>
<li><a name="TOC10" href="#SEC10">JIT FAST PATH API</a>
<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
<li><a name="TOC12" href="#SEC12">AUTHOR</a>
<li><a name="TOC13" href="#SEC13">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a><br>
<P>
Just-in-time compiling is a heavyweight optimization that can greatly speed up
pattern matching. However, it comes at the cost of extra processing before the
match is performed, so it is of most benefit when the same pattern is going to
be matched many times. This does not necessarily mean many calls of a matching
function; if the pattern is not anchored, matching attempts may take place many
times at various positions in the subject, even for a single call. Therefore,
if the subject string is very long, it may still pay to use JIT even for
one-off matches. JIT support is available for all of the 8-bit, 16-bit and
32-bit PCRE2 libraries.
</P>
<P>
JIT support applies only to the traditional Perl-compatible matching function.
It does not apply when the DFA matching function is being used. The code for
this support was written by Zoltan Herczeg.
</P>
<br><a name="SEC2" href="#TOC1">AVAILABILITY OF JIT SUPPORT</a><br>
<P>
JIT support is an optional feature of PCRE2. The "configure" option
--enable-jit (or equivalent CMake option) must be set when PCRE2 is built if
you want to use JIT. The support is limited to the following hardware
platforms:
<pre>
ARM 32-bit (v5, v7, and Thumb2)
ARM 64-bit
Intel x86 32-bit and 64-bit
MIPS 32-bit and 64-bit
Power PC 32-bit and 64-bit
SPARC 32-bit
</pre>
If --enable-jit is set on an unsupported platform, compilation fails.
</P>
<P>
A program can tell if JIT support is available by calling <b>pcre2_config()</b>
with the PCRE2_CONFIG_JIT option. The result is 1 when JIT is available, and 0
otherwise. However, a simple program does not need to check this in order to
use JIT. The API is implemented in a way that falls back to the interpretive
code if JIT is not available. For programs that need the best possible
performance, there is also a "fast path" API that is JIT-specific.
</P>
<br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
<P>
To make use of the JIT support in the simplest way, all you have to do is to
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
<b>pcre2_compile()</b>. This function has two arguments: the first is the
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
second is zero or more of the following option bits: PCRE2_JIT_COMPLETE,
PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
</P>
<P>
If JIT support is not available, a call to <b>pcre2_jit_compile()</b> does
nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern
is passed to the JIT compiler, which turns it into machine code that executes
much faster than the normal interpretive code, but yields exactly the same
results. The returned value from <b>pcre2_jit_compile()</b> is zero on success,
or a negative error code.
</P>
<P>
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete
matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or
PCRE2_PARTIAL_SOFT options of <b>pcre2_match()</b>, you should set one or both
of the other options as well as, or instead of PCRE2_JIT_COMPLETE. The JIT
compiler generates different optimized code for each of the three modes
(normal, soft partial, hard partial). When <b>pcre2_match()</b> is called, the
appropriate code is run if it is available. Otherwise, the pattern is matched
using interpretive code.
</P>
<P>
You can call <b>pcre2_jit_compile()</b> multiple times for the same compiled
pattern. It does nothing if it has previously compiled code for any of the
option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and
(perhaps later, when you find you need partial matching) again with
PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore
PCRE2_JIT_COMPLETE and just compile code for partial matching. If
<b>pcre2_jit_compile()</b> is called with no option bits set, it immediately
returns zero. This is an alternative way of testing whether JIT is available.
</P>
<P>
At present, it is not possible to free JIT compiled code except when the entire
compiled pattern is freed by calling <b>pcre2_code_free()</b>.
</P>
<P>
In some circumstances you may need to call additional functions. These are
described in the section entitled
<a href="#stackcontrol">"Controlling the JIT stack"</a>
below.
</P>
<P>
There are some <b>pcre2_match()</b> options that are not supported by JIT, and
there are also some pattern items that JIT cannot handle. Details are given
below. In both cases, matching automatically falls back to the interpretive
code. If you want to know whether JIT was actually used for a particular match,
you should arrange for a JIT callback function to be set up as described in the
section entitled
<a href="#stackcontrol">"Controlling the JIT stack"</a>
below, even if you do not need to supply a non-default JIT stack. Such a
callback function is called whenever JIT code is about to be obeyed. If the
match-time options are not right for JIT execution, the callback function is
not obeyed.
</P>
<P>
If the JIT compiler finds an unsupported item, no JIT data is generated. You
can find out if JIT matching is available after compiling a pattern by calling
<b>pcre2_pattern_info()</b> with the PCRE2_INFO_JITSIZE option. A non-zero
result means that JIT compilation was successful. A result of 0 means that JIT
support is not available, or the pattern was not processed by
<b>pcre2_jit_compile()</b>, or the JIT compiler was not able to handle the
pattern.
</P>
<br><a name="SEC4" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
<P>
The <b>pcre2_match()</b> options that are supported for JIT matching are
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
PCRE2_ANCHORED option is not supported at match time.
</P>
<P>
The only unsupported pattern items are \C (match a single data unit) when
running in a UTF mode, and a callout immediately before an assertion condition
in a conditional group.
</P>
<br><a name="SEC5" href="#TOC1">RETURN VALUES FROM JIT MATCHING</a><br>
<P>
When a pattern is matched using JIT matching, the return values are the same
as those given by the interpretive <b>pcre2_match()</b> code, with the addition
of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory
used for the JIT stack was insufficient. See
<a href="#stackcontrol">"Controlling the JIT stack"</a>
below for a discussion of JIT stack usage.
</P>
<P>
The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if searching
a very large pattern tree goes on for too long, as it is in the same
circumstance when JIT is not used, but the details of exactly what is counted
are not the same. The PCRE2_ERROR_RECURSIONLIMIT error code is never returned
when JIT matching is used.
<a name="stackcontrol"></a></P>
<br><a name="SEC6" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
<P>
When the compiled JIT code runs, it needs a block of memory to use as a stack.
By default, it uses 32K on the machine stack. However, some large or
complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT
is given when there is not enough stack. Three functions are provided for
managing blocks of memory for use as JIT stacks. There is further discussion
about the use of JIT stacks in the section entitled
<a href="#stackfaq">"JIT stack FAQ"</a>
below.
</P>
<P>
The <b>pcre2_jit_stack_create()</b> function creates a JIT stack. Its arguments
are a starting size, a maximum size, and a general context (for memory
allocation functions, or NULL for standard memory allocation). It returns a
pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
is an error. The <b>pcre2_jit_stack_free()</b> function is used to free a stack
that is no longer needed. (For the technically minded: the address space is
allocated by mmap or VirtualAlloc.)
</P>
<P>
JIT uses far less memory for recursion than the interpretive code,
and a maximum stack size of 512K to 1M should be more than enough for any
pattern.
</P>
<P>
The <b>pcre2_jit_stack_assign()</b> function specifies which stack JIT code
should use. Its arguments are as follows:
<pre>
pcre2_match_context *mcontext
pcre2_jit_callback callback
void *data
</pre>
The first argument is a pointer to a match context. When this is subsequently
passed to a matching function, its information determines which JIT stack is
used. There are three cases for the values of the other two options:
<pre>
(1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
on the machine stack is used. This is the default when a match
context is created.
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
a pointer to a valid JIT stack, the result of calling
<b>pcre2_jit_stack_create()</b>.
(3) If <i>callback</i> is not NULL, it must point to a function that is
called with <i>data</i> as an argument at the start of matching, in
order to set up a JIT stack. If the return from the callback
function is NULL, the internal 32K stack is used; otherwise the
return value must be a valid JIT stack, the result of calling
<b>pcre2_jit_stack_create()</b>.
</pre>
A callback function is obeyed whenever JIT code is about to be run; it is not
obeyed when <b>pcre2_match()</b> is called with options that are incompatible
for JIT matching. A callback function can therefore be used to determine
whether a match operation was executed by JIT or by the interpreter.
</P>
<P>
You may safely use the same JIT stack for more than one pattern (either by
assigning directly or by callback), as long as the patterns are all matched
sequentially in the same thread. In a multithread application, if you do not
specify a JIT stack, or if you assign or pass back NULL from a callback, that
is thread-safe, because each thread has its own machine stack. However, if you
assign or pass back a non-NULL JIT stack, this must be a different stack for
each thread so that the application is thread-safe.
</P>
<P>
Strictly speaking, even more is allowed. You can assign the same non-NULL stack
to a match context that is used by any number of patterns, as long as they are
not used for matching by multiple threads at the same time. For example, you
could use the same stack in all compiled patterns, with a global mutex in the
callback to wait until the stack is available for use. However, this is an
inefficient solution, and not recommended.
</P>
<P>
This is a suggestion for how a multithreaded program that needs to set up
non-default JIT stacks might operate:
<pre>
During thread initalization
thread_local_var = pcre2_jit_stack_create(...)
During thread exit
pcre2_jit_stack_free(thread_local_var)
Use a one-line callback function
return thread_local_var
</pre>
All the functions described in this section do nothing if JIT is not available.
<a name="stackfaq"></a></P>
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
<P>
(1) Why do we need JIT stacks?
<br>
<br>
PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack where
the local data of the current node is pushed before checking its child nodes.
Allocating real machine stack on some platforms is difficult. For example, the
stack chain needs to be updated every time if we extend the stack on PowerPC.
Although it is possible, its updating time overhead decreases performance. So
we do the recursion in memory.
</P>
<P>
(2) Why don't we simply allocate blocks of memory with <b>malloc()</b>?
<br>
<br>
Modern operating systems have a nice feature: they can reserve an address space
instead of allocating memory. We can safely allocate memory pages inside this
address space, so the stack could grow without moving memory data (this is
important because of pointers). Thus we can allocate 1M address space, and use
only a single memory page (usually 4K) if that is enough. However, we can still
grow up to 1M anytime if needed.
</P>
<P>
(3) Who "owns" a JIT stack?
<br>
<br>
The owner of the stack is the user program, not the JIT studied pattern or
anything else. The user program must ensure that if a stack is being used by
<b>pcre2_match()</b>, (that is, it is assigned to a match context that is passed
to the pattern currently running), that stack must not be used by any other
threads (to avoid overwriting the same memory area). The best practice for
multithreaded programs is to allocate a stack for each thread, and return this
stack through the JIT callback function.
</P>
<P>
(4) When should a JIT stack be freed?
<br>
<br>
You can free a JIT stack at any time, as long as it will not be used by
<b>pcre2_match()</b> again. When you assign the stack to a match context, only a
pointer is set. There is no reference counting or any other magic. You can free
compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
not\fP call <b>pcre2_match()</b> with a match context pointing to an already
freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
used by <b>pcre2_match()</b> in another thread). You can also replace the stack
in a context at any time when it is not in use. You should free the previous
stack before assigning a replacement.
</P>
<P>
(5) Should I allocate/free a stack every time before/after calling
<b>pcre2_match()</b>?
<br>
<br>
No, because this is too costly in terms of resources. However, you could
implement some clever idea which release the stack if it is not used in let's
say two minutes. The JIT callback can help to achieve this without keeping a
list of patterns.
</P>
<P>
(6) OK, the stack is for long term memory allocation. But what happens if a
pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
stack is freed?
<br>
<br>
Especially on embedded sytems, it might be a good idea to release memory
sometimes without freeing the stack. There is no API for this at the moment.
Probably a function call which returns with the currently allocated memory for
any stack and another which allows releasing memory (shrinking the stack) would
be a good idea if someone needs this.
</P>
<P>
(7) This is too much of a headache. Isn't there any better solution for JIT
stack handling?
<br>
<br>
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
out this complicated API.
</P>
<br><a name="SEC8" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
<P>
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
</P>
<P>
The JIT executable allocator does not free all memory when it is possible.
It expects new allocations, and keeps some free memory around to improve
allocation speed. However, in low memory conditions, it might be better to free
all possible memory. You can cause this to happen by calling
pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
memory management, or NULL for standard memory management.
</P>
<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
<P>
This is a single-threaded example that specifies a JIT stack without using a
callback. A real program should include error checking after all the function
calls.
<pre>
int rc;
pcre2_code *re;
pcre2_match_data *match_data;
pcre2_match_context *mcontext;
pcre2_jit_stack *jit_stack;
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
&errornumber, &erroffset, NULL);
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
mcontext = pcre2_match_context_create(NULL);
jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL);
pcre2_jit_stack_assign(mcontext, NULL, jit_stack);
match_data = pcre2_match_data_create(re, 10);
rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext);
/* Process result */
pcre2_code_free(re);
pcre2_match_data_free(match_data);
pcre2_match_context_free(mcontext);
pcre2_jit_stack_free(jit_stack);
</PRE>
</P>
<br><a name="SEC10" href="#TOC1">JIT FAST PATH API</a><br>
<P>
Because the API described above falls back to interpreted matching when JIT is
not available, it is convenient for programs that are written for general use
in many environments. However, calling JIT via <b>pcre2_match()</b> does have a
performance impact. Programs that are written for use where JIT is known to be
available, and which need the best possible performance, can instead use a
"fast path" API to call JIT matching directly instead of calling
<b>pcre2_match()</b> (obviously only for patterns that have been successfully
processed by <b>pcre2_jit_compile()</b>).
</P>
<P>
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
the same arguments as <b>pcre2_match()</b>. The return values are also the same,
plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
requested that was not compiled. Unsupported option bits (for example,
PCRE2_ANCHORED) are ignored.
</P>
<P>
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
number of other sanity checks are performed on the arguments. For example, if
the subject pointer is NULL, an immediate error is given. Also, unless
PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
interests of speed, these checks do not happen on the JIT fast path, and if
invalid data is passed, the result is undefined.
</P>
<P>
Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
speedups of more than 10%.
</P>
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2api</b>(3)
</P>
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel (FAQ by Zoltan Herczeg)
<br>
University Computing Service
<br>
Cambridge, England.
<br>
</P>
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
<P>
Last updated: 27 November 2014
<br>
Copyright &copy; 1997-2014 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

Some files were not shown because too many files have changed in this diff Show More