diff --git a/CMakeLists.txt b/CMakeLists.txt index 11a55902e..85e9f8229 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,15 +46,11 @@ find_package(Avro) find_package(GSSAPI) find_package(SQLite) -# Find or build PCRE2 +# Build PCRE2 so we always know the version # Read BuildPCRE2 for details about how to add pcre2 as a dependency to a target -find_package(PCRE2) -if(NOT PCRE2_FOUND) - message(STATUS "Using bundled PCRE2 library") - include(cmake/BuildPCRE2.cmake) -endif() +include(cmake/BuildPCRE2.cmake) -include_directories(${PCRE2_INCLUDE_DIRS}) +include_directories(BEFORE ${PCRE2_INCLUDE_DIRS}) # If the connector was not found, download and build it from source if(NOT MARIADB_CONNECTOR_FOUND) diff --git a/COPYRIGHT b/COPYRIGHT index ddfc55378..906404381 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/Documentation/Filters/Masking.md b/Documentation/Filters/Masking.md index 6122bd4d6..d15eea215 100644 --- a/Documentation/Filters/Masking.md +++ b/Documentation/Filters/Masking.md @@ -198,23 +198,21 @@ specified name. #### `with` -The value of this key is an object that specifies what the value -of the matched column should be replaced with. Currently, the object -is expected to contain either the key `value` or the key `fill`. The -value of both must be a string. If both keys are specified, then -`value` takes presedence. +The value of this key is an object that specifies what the value of the matched +column should be replaced with. Currently, the object is expected to contain +either the key `value` or the key `fill`. The value of both must be a string +with length greater than zero. If both keys are specified, `value` takes +precedence. If `fill` is not specified, the default `X` is used as its value. -If `value` is specified, then its value is used to replace the actual -value verbatim and the length of the specified value must match the -actual returned value (from the server) exactly. If the lengths do -not match, then if `fill` is specified its value will be used to -mask the actual value. Otherwise an error is logged and the value -is *not* masked. +If `value` is specified, then its value is used to replace the actual value +verbatim and the length of the specified value must match the actual returned +value (from the server) exactly. If the lengths do not match, the value of +`fill` is used to mask the actual value. -If `fill` is specified, then its value will be used for masking the -value; as such if the lenghts match, by cutting it if the actual value -is shorter, and by repeating it, fully or partially, the necessary -amount of times, if the actual value is longer. +When the value of `fill` (fill-value) is used for masking the returned value, +the fill-value is used as many times as necessary to match the length of the +return value. If required, only a part of the fill-value may be used in the end +of the mask value to get the lengths to match. ``` { "rules": [ diff --git a/Documentation/Filters/Tee-Filter.md b/Documentation/Filters/Tee-Filter.md index 8e1155351..72e7f4424 100644 --- a/Documentation/Filters/Tee-Filter.md +++ b/Documentation/Filters/Tee-Filter.md @@ -6,12 +6,15 @@ The tee filter is a "plumbing" fitting in the MariaDB MaxScale filter toolkit. It can be used in a filter pipeline of a service to make copies of requests from the client and send the copies to another service within MariaDB MaxScale. +**Please Note:** Starting with MaxScale 2.2.0, any client that connects to a + service which uses a tee filter will require a grant for the loopback address, + i.e. `127.0.0.1`. + ## Configuration The configuration block for the TEE filter requires the minimal filter parameters in its section within the MaxScale configuration file. The service to -send the duplicates to must be defined. Currently the tee filter does not -support multi-statements. +send the duplicates to must be defined. ``` [DataMartFilter] diff --git a/Documentation/REST-API/API.md b/Documentation/REST-API/API.md index 1d3986aa9..c203e0f55 100644 --- a/Documentation/REST-API/API.md +++ b/Documentation/REST-API/API.md @@ -111,10 +111,16 @@ Credentials for authentication. #### Content-Type All PUT and POST requests must use the `Content-Type: application/json` media -type and the request body must be a valid JSON representation of a resource. All -PATCH requests must use the `Content-Type: application/json` media type and the -request body must be a JSON document containing a partial definition of the -original resource. +type and the request body must be a complete and valid JSON representation of a +resource. All PATCH requests must use the `Content-Type: application/json` media +type and the request body must be a JSON document containing a partial +definition of the original resource. + +The current version of the API supports PATCH-like PUT requests with +partial definitions of resources in the request body. This is discouraged +as it goes against the intended use of the PUT method. Future versions of +the MaxScale REST API can remove this support which means that this +functionality is deprecated. #### Host diff --git a/Documentation/REST-API/Resources-MaxScale.md b/Documentation/REST-API/Resources-MaxScale.md index 6a19a52ab..5e6756e00 100644 --- a/Documentation/REST-API/Resources-MaxScale.md +++ b/Documentation/REST-API/Resources-MaxScale.md @@ -304,89 +304,69 @@ GET /v1/maxscale/modules "self": "http://localhost:8989/v1/maxscale/modules/" }, "data": { - "id": "readwritesplit", + "id": "dbfwfilter", "type": "module", "attributes": { - "module_type": "Router", - "version": "V1.1.0", - "description": "A Read/Write splitting router for enhancement read scalability", - "api": "router", + "module_type": "Filter", + "version": "V1.2.0", + "description": "Firewall Filter", + "api": "filter", "status": "GA", + "commands": [ + { + "id": "rules/reload", + "type": "module_command", + "links": { + "self": "http://localhost:8989/v1/modules/dbfwfilter/rules/reload" + }, + "attributes": { + "method": "POST", + "arg_min": 1, + "arg_max": 2, + "parameters": [ + { + "description": "Filter to reload", + "type": "FILTER", + "required": true + }, + { + "description": "Path to rule file", + "type": "[STRING]", + "required": false + } + ] + } + } + ], "parameters": [ { - "name": "use_sql_variables_in", - "type": "enum", - "default_value": "all", - "enum_values": [ - "all", - "master" - ] + "name": "rules", + "type": "path" }, { - "name": "slave_selection_criteria", - "type": "enum", - "default_value": "LEAST_CURRENT_OPERATIONS", - "enum_values": [ - "LEAST_GLOBAL_CONNECTIONS", - "LEAST_ROUTER_CONNECTIONS", - "LEAST_BEHIND_MASTER", - "LEAST_CURRENT_OPERATIONS" - ] - }, - { - "name": "master_failure_mode", - "type": "enum", - "default_value": "fail_instantly", - "enum_values": [ - "fail_instantly", - "fail_on_write", - "error_on_write" - ] - }, - { - "name": "max_slave_replication_lag", - "type": "int", - "default_value": "-1" - }, - { - "name": "max_slave_connections", - "type": "string", - "default_value": "255" - }, - { - "name": "retry_failed_reads", - "type": "bool", - "default_value": "true" - }, - { - "name": "disable_sescmd_history", - "type": "bool", - "default_value": "true" - }, - { - "name": "max_sescmd_history", - "type": "count", - "default_value": "0" - }, - { - "name": "strict_multi_stmt", - "type": "bool", - "default_value": "true" - }, - { - "name": "master_accept_reads", + "name": "log_match", "type": "bool", "default_value": "false" }, { - "name": "connection_keepalive", - "type": "count", - "default_value": "0" + "name": "log_no_match", + "type": "bool", + "default_value": "false" + }, + { + "name": "action", + "type": "enum", + "default_value": "block", + "enum_values": [ + "allow", + "block", + "ignore" + ] } ] }, "links": { - "self": "http://localhost:8989/v1/modules/readwritesplit" + "self": "http://localhost:8989/v1/modules/dbfwfilter" } } } diff --git a/Documentation/REST-API/Resources-Monitor.md b/Documentation/REST-API/Resources-Monitor.md index cb7a70f6e..832577c74 100644 --- a/Documentation/REST-API/Resources-Monitor.md +++ b/Documentation/REST-API/Resources-Monitor.md @@ -267,7 +267,7 @@ The :name in the URI must map to a monitor name with all whitespace replaced wit hyphens. The request body must be a valid JSON document representing the modified monitor. ``` -PUT /v1/monitor/:name +PATCH /v1/monitor/:name ``` ### Modifiable Fields diff --git a/Documentation/REST-API/Resources-Server.md b/Documentation/REST-API/Resources-Server.md index 8254fa896..c50f186ca 100644 --- a/Documentation/REST-API/Resources-Server.md +++ b/Documentation/REST-API/Resources-Server.md @@ -312,7 +312,7 @@ Status: 403 Forbidden ### Update a server ``` -PUT /v1/servers/:name +PATCH /v1/servers/:name ``` The _:name_ in the URI must map to a server name with all whitespace replaced @@ -443,12 +443,12 @@ Request for `PUT /v1/server/server1`: } ``` -The current implementation accepts both PUT and PATCH requests with partially -defined resources as request body. If parts of the resource are not defined -(e.g. the `attributes` field in the above example), those parts of the resource -are not modified. All parts that are defined are interpreted as the new -definition of those part of the resource. In the above example, the -`relationships` of the resource are completely redefined. +The current implementation accepts PATCH requests with partially defined +resources as request body. If parts of the resource are not defined (e.g. the +`attributes` field in the above example), those parts of the resource are not +modified. All parts that are defined are interpreted as the new definition of +those part of the resource. In the above example, the `relationships` of the +resource are completely redefined. #### Response diff --git a/Documentation/REST-API/Resources-Service.md b/Documentation/REST-API/Resources-Service.md index 0f34f550b..e43c585b8 100644 --- a/Documentation/REST-API/Resources-Service.md +++ b/Documentation/REST-API/Resources-Service.md @@ -265,7 +265,7 @@ The _:name_ in the URI must map to a service name and the request body must be a valid JSON Patch document which is applied to the resource. ``` -PUT /v1/services/:name +PATCH /v1/services/:name ``` The following standard service parameters can be modified. diff --git a/Documentation/Release-Notes/MaxScale-2.1.2-Release-Notes.md b/Documentation/Release-Notes/MaxScale-2.1.2-Release-Notes.md index da7a52163..121ab76b9 100644 --- a/Documentation/Release-Notes/MaxScale-2.1.2-Release-Notes.md +++ b/Documentation/Release-Notes/MaxScale-2.1.2-Release-Notes.md @@ -1,4 +1,4 @@ -# MariaDB MaxScale 2.1.2 Release Notes +# MariaDB MaxScale 2.1.2 Release Notes -- 2017-04-03 Release 2.1.2 is a Beta release. diff --git a/Documentation/Release-Notes/MaxScale-2.1.3-Release-Notes.md b/Documentation/Release-Notes/MaxScale-2.1.3-Release-Notes.md index 31071b8d9..02abaab50 100644 --- a/Documentation/Release-Notes/MaxScale-2.1.3-Release-Notes.md +++ b/Documentation/Release-Notes/MaxScale-2.1.3-Release-Notes.md @@ -1,4 +1,4 @@ -# MariaDB MaxScale 2.1.3 Release Notes +# MariaDB MaxScale 2.1.3 Release Notes -- 2017-05-23 Release 2.1.3 is a GA release. diff --git a/Documentation/Release-Notes/MaxScale-2.2.0-Release-Notes.md b/Documentation/Release-Notes/MaxScale-2.2.0-Release-Notes.md index 781f7776f..3d4640569 100644 --- a/Documentation/Release-Notes/MaxScale-2.2.0-Release-Notes.md +++ b/Documentation/Release-Notes/MaxScale-2.2.0-Release-Notes.md @@ -23,6 +23,18 @@ This filter now uses the PCRE2-libarary to match queries. Previously, it used the POSIX-version of PCRE2. The filter also accepts multiple match-server pairs. Please see the NamedServerFilter documentation for details. +### Tee Filter + +The `tee` filter has been rewritten to better suit the way MaxScale now +functions. The filter requires that the service where the branched session is +created has at least one network listener. The users must also be able to +connect from the local MaxScale host. Usually this means that an extra grant for +the loopback address is required (e.g. `myuser@127.0.0.1`). + +In addition to the aforementioned requirements, a failure to create a branched +session no longer causes the actual client session to be closed. In most cases, +this is desired behavior. + ## Dropped Features ### MaxAdmin diff --git a/Documentation/check_links.sh b/Documentation/check_links.sh index 7b97efb64..5f949c782 100755 --- a/Documentation/check_links.sh +++ b/Documentation/check_links.sh @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/LICENSE.TXT b/LICENSE.TXT index d781d6f9a..c2f9a61c7 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -4,13 +4,13 @@ License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. Parameters Licensor: MariaDB Corporation Ab -Licensed Work: MariaDB MaxScale (TM) v.2.1. +Licensed Work: MariaDB MaxScale (TM) v.2.2. The Licensed Work is (c) 2017 MariaDB Corporation Ab Additional Use Grant: You may use the Licensed Work when your application uses the Licensed Work with a total of less than three server instances for any purpose. -Change Date: 2019-07-01 +Change Date: 2020-01-01 Change License: Version 2 or later of the GNU General Public License as published by the Free Software Foundation. diff --git a/avro/maxavro.c b/avro/maxavro.c index 8717ee19d..c013a3b4e 100644 --- a/avro/maxavro.c +++ b/avro/maxavro.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavro.h b/avro/maxavro.h index 00a9649b9..f4b103354 100644 --- a/avro/maxavro.h +++ b/avro/maxavro.h @@ -6,7 +6,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavro_datablock.c b/avro/maxavro_datablock.c index 1b21d2615..c4800d56d 100644 --- a/avro/maxavro_datablock.c +++ b/avro/maxavro_datablock.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavro_file.c b/avro/maxavro_file.c index 2a1a98213..df5292099 100644 --- a/avro/maxavro_file.c +++ b/avro/maxavro_file.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavro_internal.h b/avro/maxavro_internal.h index 8c4f4e1a5..476085be1 100644 --- a/avro/maxavro_internal.h +++ b/avro/maxavro_internal.h @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavro_record.c b/avro/maxavro_record.c index bc7bea243..0cccafc9e 100644 --- a/avro/maxavro_record.c +++ b/avro/maxavro_record.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavro_schema.c b/avro/maxavro_schema.c index 5bb6fa4a4..29215151f 100644 --- a/avro/maxavro_schema.c +++ b/avro/maxavro_schema.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavro_write.c b/avro/maxavro_write.c index 1ea2abc4a..1a3fb17fe 100644 --- a/avro/maxavro_write.c +++ b/avro/maxavro_write.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/maxavrocheck.c b/avro/maxavrocheck.c index 191802515..5cc552ac9 100644 --- a/avro/maxavrocheck.c +++ b/avro/maxavrocheck.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/avro/test/test_values.c b/avro/test/test_values.c index 6798d7cbe..578821b4f 100644 --- a/avro/test/test_values.c +++ b/avro/test/test_values.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/client/maxadmin.c b/client/maxadmin.c index 3a27083c0..f6ad531b8 100644 --- a/client/maxadmin.c +++ b/client/maxadmin.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -253,6 +254,11 @@ main(int argc, char **argv) if ((so = connectUsingInetSocket(hostname, port, user, passwd)) == -1) { + if (access(MAXADMIN_DEFAULT_SOCKET, R_OK) == 0) + { + fprintf(stderr, "Found default MaxAdmin socket in: %s\n", MAXADMIN_DEFAULT_SOCKET); + fprintf(stderr, "Try connecting with:\n\n\tmaxadmin -S %s\n\n", MAXADMIN_DEFAULT_SOCKET); + } exit(EXIT_FAILURE); } } @@ -597,7 +603,13 @@ authUnixSocket(int so) if (!authenticated) { - fprintf(stderr, "Could connect to MaxScale, but was not authorized.\n"); + uid_t id = geteuid(); + struct passwd* pw = getpwuid(id); + fprintf(stderr, "Could connect to MaxScale, but was not authorized.\n" + "Check that the current user is added to the list of allowed users.\n" + "To add this user to the list, execute:\n\n" + "\tsudo maxadmin enable account %s\n\n" + "This assumes that the root user account is enabled in MaxScale.\n", pw->pw_name); } return authenticated; diff --git a/examples/roundrobinrouter.cpp b/examples/roundrobinrouter.cpp index c7a1c5fbe..53dd34e69 100644 --- a/examples/roundrobinrouter.cpp +++ b/examples/roundrobinrouter.cpp @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -673,7 +673,8 @@ static MXS_ROUTER* createInstance(SERVICE* service, char** options) * the pointer. */ /* Register a custom command */ - if (!modulecmd_register_command("rrrouter", "test_command", custom_cmd_example, + if (!modulecmd_register_command("rrrouter", "test_command", + MODULECMD_TYPE_ACTIVE, custom_cmd_example, 2, custom_cmd_args)) { MXS_ERROR("Module command registration failed."); diff --git a/examples/testfilter.c b/examples/testfilter.c index 8b2a2ab3d..ce919831f 100644 --- a/examples/testfilter.c +++ b/examples/testfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/examples/testprotocol.c b/examples/testprotocol.c index 2687bc0bf..396825cc7 100644 --- a/examples/testprotocol.c +++ b/examples/testprotocol.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/examples/testroute.c b/examples/testroute.c index 1cbdfa469..91254eaa4 100644 --- a/examples/testroute.c +++ b/examples/testroute.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/adminusers.h.in b/include/maxscale/adminusers.h.in index 9cc53f589..79a6157c4 100644 --- a/include/maxscale/adminusers.h.in +++ b/include/maxscale/adminusers.h.in @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/alloc.h b/include/maxscale/alloc.h index fb9ed2a55..a30a6c56f 100644 --- a/include/maxscale/alloc.h +++ b/include/maxscale/alloc.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/atomic.h b/include/maxscale/atomic.h index e571f2d11..509509103 100644 --- a/include/maxscale/atomic.h +++ b/include/maxscale/atomic.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/authenticator.h b/include/maxscale/authenticator.h index 4da3bada3..92ae1f09c 100644 --- a/include/maxscale/authenticator.h +++ b/include/maxscale/authenticator.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/buffer.h b/include/maxscale/buffer.h index 139d6e2a8..be21992a5 100644 --- a/include/maxscale/buffer.h +++ b/include/maxscale/buffer.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/buffer.hh b/include/maxscale/buffer.hh index 979f2d278..3f8d4effc 100644 --- a/include/maxscale/buffer.hh +++ b/include/maxscale/buffer.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/cdefs.h b/include/maxscale/cdefs.h index 21263b5dc..89819b78a 100644 --- a/include/maxscale/cdefs.h +++ b/include/maxscale/cdefs.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/config.h b/include/maxscale/config.h index 392513b1f..8d3973b6d 100644 --- a/include/maxscale/config.h +++ b/include/maxscale/config.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -24,6 +24,7 @@ #include #include +#include MXS_BEGIN_DECLS @@ -108,6 +109,7 @@ extern const char CN_LOG_THROTTLING[]; extern const char CN_MAXSCALE[]; extern const char CN_MAX_CONNECTIONS[]; extern const char CN_MAX_RETRY_INTERVAL[]; +extern const char CN_METHOD[]; extern const char CN_MODULE[]; extern const char CN_MODULES[]; extern const char CN_MODULE_COMMAND[]; @@ -351,12 +353,40 @@ struct server* config_get_server(const MXS_CONFIG_PARAMETER *params, const char int config_get_server_list(const MXS_CONFIG_PARAMETER *params, const char *key, struct server*** output); +/** + * Get a compiled regular expression. The returned @c pcre2_code should be freed + * by the caller. + * + * @param params List of configuration parameters + * @param key Parameter name + * @param options PCRE2 compilation options + * @return The compiled PCRE2 code, or NULL on error + */ +pcre2_code* config_get_compiled_regex(const MXS_CONFIG_PARAMETER *params, const char *key, + uint32_t options); + +/** + * Get a compiled regular expression and the capture count of the pattern. The + * @c pcre2_code should be freed by the caller. + * + * @param params List of configuration parameters + * @param key Parameter name + * @param options PCRE2 compilation options + * @param output_code Output for compilation result + * @param output_capcount Output for capture count + * @return True on success, false otherwise + */ +bool config_get_compiled_regex_capcount(const MXS_CONFIG_PARAMETER *params, + const char *key, uint32_t options, + pcre2_code** output_code, + uint32_t* output_capcount); + /** * Parse a list of server names and write the results in an array of strings * with one server name in each. The output array and its elements should be * deallocated by the caller. The server names are not checked to be actual * configured servers. - * + * * The output array may contain more elements than the the value returned, but these * extra elements are null and in the end of the array. If no server names were * parsed or if an error occurs, nothing is written to the output parameter. diff --git a/include/maxscale/cppdefs.hh b/include/maxscale/cppdefs.hh index 7f4056fe5..c53e0c907 100644 --- a/include/maxscale/cppdefs.hh +++ b/include/maxscale/cppdefs.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/dcb.h b/include/maxscale/dcb.h index 27cf49228..1487237c5 100644 --- a/include/maxscale/dcb.h +++ b/include/maxscale/dcb.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -274,7 +274,6 @@ DCB *dcb_alloc(dcb_role_t, struct servlistener *); void dcb_free(DCB *); void dcb_free_all_memory(DCB *dcb); DCB *dcb_connect(struct server *, struct session *, const char *); -DCB *dcb_clone(DCB *); int dcb_read(DCB *, GWBUF **, int); int dcb_drain_writeq(DCB *); void dcb_close(DCB *); @@ -363,11 +362,9 @@ int dcb_get_port(const DCB *dcb); /** * DCB flags values */ -#define DCBF_CLONE 0x0001 /*< DCB is a clone */ #define DCBF_HUNG 0x0002 /*< Hangup has been dispatched */ #define DCBF_REPLIED 0x0004 /*< DCB was written to */ -#define DCB_IS_CLONE(d) ((d)->flags & DCBF_CLONE) #define DCB_REPLIED(d) ((d)->flags & DCBF_REPLIED) MXS_END_DECLS diff --git a/include/maxscale/debug.h b/include/maxscale/debug.h index 9732e35f3..cdd45aedd 100644 --- a/include/maxscale/debug.h +++ b/include/maxscale/debug.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/filter.h b/include/maxscale/filter.h index 1952b1331..3aa6fdfde 100644 --- a/include/maxscale/filter.h +++ b/include/maxscale/filter.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/filter.hh b/include/maxscale/filter.hh index d66051b0a..278e39b0f 100644 --- a/include/maxscale/filter.hh +++ b/include/maxscale/filter.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -183,7 +183,7 @@ protected: * class MyFilter : public maxscale::Filter * { * public: - * static MyFilter* create(const char* zName, char** pzOptions, FILTER_PARAMETER** ppParams); + * static MyFilter* create(const char* zName, char** pzOptions, MXS_CONFIG_PARAMETER* ppParams); * * MyFilterSession* newSession(MXS_SESSION* pSession); * diff --git a/include/maxscale/hashtable.h b/include/maxscale/hashtable.h index 8ff4b2473..eeb594fd3 100644 --- a/include/maxscale/hashtable.h +++ b/include/maxscale/hashtable.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/hint.h b/include/maxscale/hint.h index 8a331020a..640944ba9 100644 --- a/include/maxscale/hint.h +++ b/include/maxscale/hint.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/hk_heartbeat.h b/include/maxscale/hk_heartbeat.h index da7176e83..580671a79 100644 --- a/include/maxscale/hk_heartbeat.h +++ b/include/maxscale/hk_heartbeat.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/housekeeper.h b/include/maxscale/housekeeper.h index bfd11be17..8dcbd9cc1 100644 --- a/include/maxscale/housekeeper.h +++ b/include/maxscale/housekeeper.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/http.hh b/include/maxscale/http.hh index 1632827be..152f85d0c 100644 --- a/include/maxscale/http.hh +++ b/include/maxscale/http.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/jansson.h b/include/maxscale/jansson.h index 64b0499b4..9b29acb30 100644 --- a/include/maxscale/jansson.h +++ b/include/maxscale/jansson.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/jansson.hh b/include/maxscale/jansson.hh index 6bab1e32a..ebc2d5313 100644 --- a/include/maxscale/jansson.hh +++ b/include/maxscale/jansson.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/json_api.h b/include/maxscale/json_api.h index 0b85b780c..b532885a0 100644 --- a/include/maxscale/json_api.h +++ b/include/maxscale/json_api.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/limits.h b/include/maxscale/limits.h index 71e5e15f9..c167bb044 100644 --- a/include/maxscale/limits.h +++ b/include/maxscale/limits.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/listener.h b/include/maxscale/listener.h index b7b56db6c..7b040117e 100644 --- a/include/maxscale/listener.h +++ b/include/maxscale/listener.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/log_manager.h b/include/maxscale/log_manager.h index fe39cc049..e926599a0 100644 --- a/include/maxscale/log_manager.h +++ b/include/maxscale/log_manager.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/maxadmin.h b/include/maxscale/maxadmin.h index bb619c219..2c07ef458 100644 --- a/include/maxscale/maxadmin.h +++ b/include/maxscale/maxadmin.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/maxscale.h b/include/maxscale/maxscale.h index 08902512f..90deb4707 100644 --- a/include/maxscale/maxscale.h +++ b/include/maxscale/maxscale.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/modinfo.h b/include/maxscale/modinfo.h index 0e7abf9cc..738fd1bf5 100644 --- a/include/maxscale/modinfo.h +++ b/include/maxscale/modinfo.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -84,7 +84,8 @@ enum mxs_module_param_type MXS_MODULE_PARAM_PATH, /**< Path to a file or a directory */ MXS_MODULE_PARAM_SERVICE, /**< Service name */ MXS_MODULE_PARAM_SERVER, /**< Server name */ - MXS_MODULE_PARAM_SERVERLIST /**< List of server names, separated by ',' */ + MXS_MODULE_PARAM_SERVERLIST, /**< List of server names, separated by ',' */ + MXS_MODULE_PARAM_REGEX /**< A regex string enclosed in '/' */ }; /** Maximum and minimum values for integer types */ @@ -246,6 +247,8 @@ static inline const char* mxs_module_param_type_to_string(enum mxs_module_param_ return "server"; case MXS_MODULE_PARAM_SERVERLIST: return "serverlist"; + case MXS_MODULE_PARAM_REGEX: + return "regular expression"; default: ss_dassert(!true); return "unknown"; diff --git a/include/maxscale/modulecmd.h b/include/maxscale/modulecmd.h index 42c023d52..bc7f69282 100644 --- a/include/maxscale/modulecmd.h +++ b/include/maxscale/modulecmd.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -65,6 +65,13 @@ typedef struct This should always be the first argument if the function requires an output DCB. */ +/** What type of an action does the command perform? */ +enum modulecmd_type +{ + MODULECMD_TYPE_PASSIVE, /**< Command only displays data */ + MODULECMD_TYPE_ACTIVE /**< Command can modify data */ +}; + /** * Options for arguments, bits 9 through 32 */ @@ -123,6 +130,7 @@ typedef struct modulecmd { char *identifier; /**< Unique identifier */ char *domain; /**< Command domain */ + enum modulecmd_type type; /**< Command type, either active or passive */ MODULECMDFN func; /**< The registered function */ int arg_count_min; /**< Minimum number of arguments */ int arg_count_max; /**< Maximum number of arguments */ @@ -130,6 +138,9 @@ typedef struct modulecmd struct modulecmd *next; /**< Next command */ } MODULECMD; +/** Check if the module command can modify the data/state of the module */ +#define MODULECMD_MODIFIES_DATA(t) (t->type == MODULECMD_TYPE_ACTIVE) + /** * @brief Register a new command * @@ -143,7 +154,8 @@ typedef struct modulecmd * @return True if the module was successfully registered, false on error */ bool modulecmd_register_command(const char *domain, const char *identifier, - MODULECMDFN entry_point, int argc, modulecmd_arg_type_t *argv); + enum modulecmd_type type, MODULECMDFN entry_point, + int argc, modulecmd_arg_type_t *argv); /** * @brief Find a registered command @@ -196,6 +208,15 @@ void modulecmd_arg_free(MODULECMD_ARG *arg); */ bool modulecmd_arg_is_present(const MODULECMD_ARG *arg, int idx); +/** + * @brief Check if module command requires an output DCB + * + * @param cmd Command to check + * + * @return True if module requires a DCB for printing output + */ +bool modulecmd_requires_output_dcb(const MODULECMD* cmd); + /** * @brief Call a registered command * diff --git a/include/maxscale/modutil.h b/include/maxscale/modutil.h index da09495d7..ad6a9c17e 100644 --- a/include/maxscale/modutil.h +++ b/include/maxscale/modutil.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index 44e100fb4..06fb6bb80 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/mysql_binlog.h b/include/maxscale/mysql_binlog.h index 6ff0ab6b3..2bb36c265 100644 --- a/include/maxscale/mysql_binlog.h +++ b/include/maxscale/mysql_binlog.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/mysql_utils.h b/include/maxscale/mysql_utils.h index bf89bd6b6..4eac68fba 100644 --- a/include/maxscale/mysql_utils.h +++ b/include/maxscale/mysql_utils.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/notification.h b/include/maxscale/notification.h index 509bc7943..051e80a44 100644 --- a/include/maxscale/notification.h +++ b/include/maxscale/notification.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/paths.h.in b/include/maxscale/paths.h.in index c30620a1e..7bf08bcd3 100644 --- a/include/maxscale/paths.h.in +++ b/include/maxscale/paths.h.in @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/pcre2.h b/include/maxscale/pcre2.h index e9f990185..0696a3f4c 100644 --- a/include/maxscale/pcre2.h +++ b/include/maxscale/pcre2.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/pcre2.hh b/include/maxscale/pcre2.hh index da191dd05..159e2578f 100644 --- a/include/maxscale/pcre2.hh +++ b/include/maxscale/pcre2.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/platform.h b/include/maxscale/platform.h index 1d488d565..3b6dc33c8 100644 --- a/include/maxscale/platform.h +++ b/include/maxscale/platform.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/poll.h b/include/maxscale/poll.h index 360dc657a..c3ec08cab 100644 --- a/include/maxscale/poll.h +++ b/include/maxscale/poll.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/poll_core.h b/include/maxscale/poll_core.h index c0eeb4828..e8df31b24 100644 --- a/include/maxscale/poll_core.h +++ b/include/maxscale/poll_core.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -98,11 +98,4 @@ bool poll_add_fd_to_worker(int wid, int fd, uint32_t events, MXS_POLL_DATA* data */ bool poll_remove_fd_from_worker(int wid, int fd); -/** - * Check whether there are cross-thread messages for current thread. - * - * @attention Only to be called by the system. - */ -void poll_check_message(void); - MXS_END_DECLS diff --git a/include/maxscale/poll_core.hh b/include/maxscale/poll_core.hh index 3f2a4d39b..b68ddcc03 100644 --- a/include/maxscale/poll_core.hh +++ b/include/maxscale/poll_core.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/protocol.h b/include/maxscale/protocol.h index 249501829..37e4a40cf 100644 --- a/include/maxscale/protocol.h +++ b/include/maxscale/protocol.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/protocol/mysql.h b/include/maxscale/protocol/mysql.h index 2dd77a44a..18760dc9b 100644 --- a/include/maxscale/protocol/mysql.h +++ b/include/maxscale/protocol/mysql.h @@ -5,36 +5,13 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General * Public License. */ -/* - * Revision History - * - * Date Who Description - * 01-06-2013 Mark Riddoch Initial implementation - * 14-06-2013 Massimiliano Pinto Added specific data - * for MySQL session - * 04-07-2013 Massimiliano Pinto Added new MySQL protocol status for asynchronous connection - * Added authentication reply status - * 12-07-2013 Massimiliano Pinto Added routines for change_user - * 14-02-2014 Massimiliano Pinto setipaddress returns int - * 25-02-2014 Massimiliano Pinto Added dcb parameter to gw_find_mysql_user_password_sha1() - * and repository to gw_check_mysql_scramble_data() - * It's now possible to specify a different users' table than - * dcb->service->users default - * 26-02-2014 Massimiliano Pinto Removed previously added parameters to gw_check_mysql_scramble_data() and - * gw_find_mysql_user_password_sha1() - * 28-02-2014 Massimiliano Pinto MYSQL_DATABASE_MAXLEN,MYSQL_USER_MAXLEN moved to dbusers.h - * 07-02-2016 Martin Brampton Extend MYSQL_session type; add MYSQL_AUTH_SUCCEEDED - * 17-05-2016 Martin Brampton Moved gw_find_mysql_user_password_sha1 to mysql_auth.c - * - */ - #include #include #include @@ -431,6 +408,30 @@ void init_response_status(GWBUF* buf, uint8_t cmd, int* npackets, size_t* nbytes bool read_complete_packet(DCB *dcb, GWBUF **readbuf); bool gw_get_shared_session_auth_info(DCB* dcb, MYSQL_session* session); +/** + * Decode server handshake + * + * @param conn The MySQLProtocol structure + * @param payload The handshake payload without the network header + * + * @return 0 on success, -1 on failure + * + */ +int gw_decode_mysql_server_handshake(MySQLProtocol *conn, uint8_t *payload); + +/** + * Create a response to the server handshake + * + * @param session Session object + * @param conn MySQL Protocol object for this connection + * @param with_ssl Whether to create an SSL response or a normal response packet + * @param ssl_established Set to true if the SSL response has been sent + * + * @return Generated response packet + */ +GWBUF* gw_generate_auth_response(MXS_SESSION* session, MySQLProtocol *conn, + bool with_ssl, bool ssl_established); + /** Read the backend server's handshake */ bool gw_read_backend_handshake(DCB *dcb, GWBUF *buffer); diff --git a/include/maxscale/query_classifier.h b/include/maxscale/query_classifier.h index d886208d5..f10ff0213 100644 --- a/include/maxscale/query_classifier.h +++ b/include/maxscale/query_classifier.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/queuemanager.h b/include/maxscale/queuemanager.h index d0b4f30a1..57f33c7f3 100644 --- a/include/maxscale/queuemanager.h +++ b/include/maxscale/queuemanager.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/random_jkiss.h b/include/maxscale/random_jkiss.h index 157864c0f..167e7ade3 100644 --- a/include/maxscale/random_jkiss.h +++ b/include/maxscale/random_jkiss.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/resultset.h b/include/maxscale/resultset.h index 69042a63e..2baec34a6 100644 --- a/include/maxscale/resultset.h +++ b/include/maxscale/resultset.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/router.h b/include/maxscale/router.h index cc6aa08cf..0f27b5a0a 100644 --- a/include/maxscale/router.h +++ b/include/maxscale/router.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/router.hh b/include/maxscale/router.hh index b3800ba50..13674d176 100644 --- a/include/maxscale/router.hh +++ b/include/maxscale/router.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/routing.h b/include/maxscale/routing.h index 0d8c43ec3..9a381e927 100644 --- a/include/maxscale/routing.h +++ b/include/maxscale/routing.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/secrets.h b/include/maxscale/secrets.h index ab30baeec..1e94b8825 100644 --- a/include/maxscale/secrets.h +++ b/include/maxscale/secrets.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/semaphore.h b/include/maxscale/semaphore.h index f6a62a481..e17f3caf4 100644 --- a/include/maxscale/semaphore.h +++ b/include/maxscale/semaphore.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/semaphore.hh b/include/maxscale/semaphore.hh index c24a7125a..91b00d119 100644 --- a/include/maxscale/semaphore.hh +++ b/include/maxscale/semaphore.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/server.h b/include/maxscale/server.h index a6f5fbb6b..1409213f0 100644 --- a/include/maxscale/server.h +++ b/include/maxscale/server.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/service.h b/include/maxscale/service.h index fbc01c090..b641ebe2d 100644 --- a/include/maxscale/service.h +++ b/include/maxscale/service.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/session.h b/include/maxscale/session.h index 240d5990e..58cc5c779 100644 --- a/include/maxscale/session.h +++ b/include/maxscale/session.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -144,7 +144,6 @@ typedef struct session MXS_DOWNSTREAM head; /*< Head of the filter chain */ MXS_UPSTREAM tail; /*< The tail of the filter chain */ int refcount; /*< Reference count on the session */ - bool ses_is_child; /*< this is a child session */ mxs_session_trx_state_t trx_state; /*< The current transaction state. */ bool autocommit; /*< Whether autocommit is on. */ struct diff --git a/include/maxscale/spinlock.h b/include/maxscale/spinlock.h index 15f1677cd..b38f276e7 100644 --- a/include/maxscale/spinlock.h +++ b/include/maxscale/spinlock.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/spinlock.hh b/include/maxscale/spinlock.hh index 8579a1911..4b0fa511d 100644 --- a/include/maxscale/spinlock.hh +++ b/include/maxscale/spinlock.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/sqlite3.h b/include/maxscale/sqlite3.h index e7a9bcfa5..0c2b23287 100644 --- a/include/maxscale/sqlite3.h +++ b/include/maxscale/sqlite3.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/ssl.h b/include/maxscale/ssl.h index bbc6cb70e..42816c213 100644 --- a/include/maxscale/ssl.h +++ b/include/maxscale/ssl.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/statistics.h b/include/maxscale/statistics.h index 8593c74a4..77c7ab976 100644 --- a/include/maxscale/statistics.h +++ b/include/maxscale/statistics.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/thread.h b/include/maxscale/thread.h index 0e6415390..e2addc302 100644 --- a/include/maxscale/thread.h +++ b/include/maxscale/thread.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/users.h b/include/maxscale/users.h index ab4accc59..c7d20a403 100644 --- a/include/maxscale/users.h +++ b/include/maxscale/users.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/utils.h b/include/maxscale/utils.h index 2c51e68a2..ea5d7ef46 100644 --- a/include/maxscale/utils.h +++ b/include/maxscale/utils.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/utils.hh b/include/maxscale/utils.hh index 631522dcf..9dafe95f7 100644 --- a/include/maxscale/utils.hh +++ b/include/maxscale/utils.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/include/maxscale/worker.h b/include/maxscale/worker.h index 484abd4f4..b3ccec58b 100644 --- a/include/maxscale/worker.h +++ b/include/maxscale/worker.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index ea935739e..5c343efeb 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -121,7 +121,7 @@ add_test_executable(bug547.cpp bug547 replication LABELS readwritesplit REPL_BAC add_test_executable(bug681.cpp bug681 galera.bug681 LABELS readwritesplit GALERA_BACKEND) # Regression case for the bug "crash with tee filter" -add_test_executable(bug643.cpp bug643 bug643 LABELS tee REPL_BACKEND) +#add_test_executable(bug643.cpp bug643 bug643 LABELS tee REPL_BACKEND) # Regression case for the bug ""Different error messages from MariaDB and Maxscale" add_test_script(bug561.sh bug561.sh replication LABELS MySQLAuth REPL_BACKEND) @@ -166,13 +166,13 @@ add_test_executable(bug626.cpp bug626 replication LABELS MySQLAuth MySQLProtocol add_test_executable(bug634.cpp bug634 replication LABELS readwritesplit REPL_BACKEND) # Regression cases for several TEE filter hangs -add_test_executable(bug645.cpp bug645 bug645 LABELS tee REPL_BACKEND) -add_test_executable(bug645_1.cpp bug645_1 bug645_1 LABELS tee REPL_BACKEND) -add_test_executable(bug649.cpp bug649 bug645 LABELS tee) -add_test_executable(bug650.cpp bug650 bug650 LABELS tee REPL_BACKEND) +#add_test_executable(bug645.cpp bug645 bug645 LABELS tee REPL_BACKEND) +#add_test_executable(bug645_1.cpp bug645_1 bug645_1 LABELS tee REPL_BACKEND) +#add_test_executable(bug649.cpp bug649 bug645 LABELS tee) +#add_test_executable(bug650.cpp bug650 bug650 LABELS tee REPL_BACKEND) # Heavy test for TEE filter -add_test_script(bug648 sql_queries bug648 LABELS tee UNSTABLE HEAVY REPL_BACKEND) +#add_test_script(bug648 sql_queries bug648 LABELS tee UNSTABLE HEAVY REPL_BACKEND) # Crash when host name for some user in mysql.user is very long add_test_executable(bug653.cpp bug653 replication LABELS MySQLAuth MySQLProtocol REPL_BACKEND) @@ -181,7 +181,7 @@ add_test_executable(bug653.cpp bug653 replication LABELS MySQLAuth MySQLProtocol add_test_executable(bug654.cpp bug654 replication LABELS maxscale REPL_BACKEND) # Regression case for the bug "Tee filter: closing child session causes MaxScale to fail" -add_test_executable(bug657.cpp bug657 bug657 LABELS tee REPL_BACKEND) +#add_test_executable(bug657.cpp bug657 bug657 LABELS tee REPL_BACKEND) # Block backends (master or all slaves) and tries to connect Maxscale add_test_executable(bug658.cpp bug658 replication LABELS readwritesplit readconnroute maxscale REPL_BACKEND) @@ -193,7 +193,7 @@ add_test_executable(bug662.cpp bug662 replication LABELS readwritesplit readconn add_test_executable(bug664.cpp bug664 bug664 LABELS MySQLAuth MySQLProtocol) # TEE fileter: execute long sequence of queries ans session commands in the loop -add_test_executable(bug670.cpp bug670 bug670 LABELS tee REPL_BACKEND) +#add_test_executable(bug670.cpp bug670 bug670 LABELS tee REPL_BACKEND) # Regression case for the bug "MaxScale crashes if "Users table data" is empty and "show dbusers" is executed in maxadmin" add_test_executable(bug673.cpp bug673 bug673 LABELS MySQLAuth REPL_BACKEND) @@ -327,7 +327,7 @@ add_test_executable(mm.cpp mm mm LABELS mmmon BREAKS_REPL) add_test_executable(mm_mysqlmon.cpp mm_mysqlmon mm_mysqlmon LABELS mysqlmon REPL_BACKEND BREAKS_REPL) # MySQL Monitor crash safety -add_test_executable(mysqlmon_backup.cpp mysqlmon_backup mysqlmon_backup LABELS mysqlmon REPL_BACKEND) +#add_test_executable(mysqlmon_backup.cpp mysqlmon_backup mysqlmon_backup LABELS mysqlmon REPL_BACKEND) # Regression case for the bug "Two monitors loaded at the same time result into not working installation" add_test_executable(mxs118.cpp mxs118 mxs118 LABELS maxscale LIGHT REPL_BACKEND) @@ -366,7 +366,7 @@ add_test_executable(mxs431.cpp mxs431 sharding LABELS schemarouter REPL_BACKEND add_test_executable(mxs47.cpp mxs47 replication LABELS MySQLProtocol LIGHT REPL_BACKEND) # Regression case for the bug "USE hangs when Tee filter uses matching" -add_test_executable(mxs501_tee_usedb.cpp mxs501_tee_usedb mxs501 LABELS tee REPL_BACKEND) +#add_test_executable(mxs501_tee_usedb.cpp mxs501_tee_usedb mxs501 LABELS tee REPL_BACKEND) # Open connection, execute 'change user', close connection in the loop add_test_executable(mxs548_short_session_change_user.cpp mxs548_short_session_change_user mxs548 LABELS MySQLProtocol REPL_BACKEND) @@ -421,7 +421,6 @@ add_test_executable(mxs812_2.cpp mxs812_2 longblob LABELS readwritesplit REPL_BA add_test_executable(mxs822_maxpasswd.cpp mxs822_maxpasswd maxpasswd LABELS maxscale REPL_BACKEND) # Do only SELECTS during time > wait_timeout and then do INSERT -# This test will fail because the functionality hasn't been implemented add_test_executable(mxs827_write_timeout.cpp mxs827_write_timeout mxs827_write_timeout LABELS readwritesplit REPL_BACKEND) # Block and unblock first and second slaves and check that they are recovered @@ -536,8 +535,8 @@ add_test_executable(rwsplit_read_only_trx.cpp rwsplit_read_only_trx rwsplit_read # Test replication-manager with MaxScale add_test_executable(replication_manager.cpp replication_manager replication_manager LABELS maxscale REPL_BACKEND) -add_test_executable_notest(replication_manager_2nodes.cpp replication_manager_2nodes replication_manager_2nodes LABELS maxscale REPL_BACKEND) -add_test_executable_notest(replication_manager_3nodes.cpp replication_manager_3nodes replication_manager_3nodes LABELS maxscale REPL_BACKEND) +#add_test_executable_notest(replication_manager_2nodes.cpp replication_manager_2nodes replication_manager_2nodes LABELS maxscale REPL_BACKEND) +#add_test_executable_notest(replication_manager_3nodes.cpp replication_manager_3nodes replication_manager_3nodes LABELS maxscale REPL_BACKEND) # Schemarouter duplicate database detection test: create DB on all nodes and then try query againt schema router add_test_executable(schemarouter_duplicate_db.cpp schemarouter_duplicate_db schemarouter_duplicate_db LABELS schemarouter REPL_BACKEND) @@ -584,11 +583,15 @@ add_test_script(sql_queries_pers10 sql_queries sql_queries_pers10 LABELS maxscal # Execute queries of different size, check data is the same when accessing via Maxscale and directly to backend, client ssl is ON add_test_script(ssl sql_queries ssl LABELS maxscale readwritesplit REPL_BACKEND) + # Check load balancing, client ssl is ON -add_test_script(ssl_load load_balancing ssl_load LABELS maxscale readwritesplit REPL_BACKEND) + +# Disabled due to some strangeness in Connector-C 3.0 TLS connections which +# cause uneven distribution of connections. +#add_test_script(ssl_load load_balancing ssl_load LABELS maxscale readwritesplit REPL_BACKEND) # Check load balancing, client ssl is ON, Galera backend -add_test_script(ssl_load_galera load_balancing_galera ssl_load_galera LABELS maxscale readwritesplit GALERA_BACKEND) +#add_test_script(ssl_load_galera load_balancing_galera ssl_load_galera LABELS maxscale readwritesplit GALERA_BACKEND) # Testing slaves who have lost their master and how MaxScale works with them add_test_executable(stale_slaves.cpp stale_slaves replication LABELS mysqlmon REPL_BACKEND) @@ -606,7 +609,7 @@ add_test_executable(test_hints.cpp test_hints hints2 LABELS hintfilter LIGHT REP add_test_executable(avro.cpp avro avro LABELS avrorouter binlogrouter LIGHT BREAKS_REPL) # Test avrorouter file compression -add_test_script(avro_compression avro avro_compression LABELS avrorouter binlogrouter LIGHT BREAKS_REPL) +#add_test_script(avro_compression avro avro_compression LABELS avrorouter binlogrouter LIGHT BREAKS_REPL) # In the binlog router setup stop Master and promote one of the Slaves to be new Master add_test_executable(binlog_change_master.cpp binlog_change_master setup_binlog_tx_safe LABELS binlogrouter BREAKS_REPL) @@ -680,7 +683,7 @@ add_test_executable(kerberos_setup.cpp kerberos_setup kerberos LABELS HEAVY gssa #add_test_executable(bad_pers.cpp bad_pers bad_pers LABELS REPL_BACKEND) # Test Aurora RDS monitor -add_test_executable(auroramon.cpp auroramon auroramon LABELS HEAVY EXTERNAL_BACKEND) +#add_test_executable(auroramon.cpp auroramon auroramon LABELS HEAVY EXTERNAL_BACKEND) # Disabled for the time being # add_test_executable(gatekeeper.cpp gatekeeper gatekeeper LABELS gatekeeper) diff --git a/maxscale-system-test/avro.cpp b/maxscale-system-test/avro.cpp index 7dbd365b2..fa80f5019 100644 --- a/maxscale-system-test/avro.cpp +++ b/maxscale-system-test/avro.cpp @@ -25,45 +25,33 @@ using std::endl; int main(int argc, char *argv[]) { - TestConnections * Test = new TestConnections(argc, argv); - Test->set_timeout(600); - Test->stop_maxscale(); - Test->ssh_maxscale(true, (char *) "rm -rf /var/lib/maxscale/avro"); + TestConnections test(argc, argv); + test.set_timeout(600); + test.ssh_maxscale(true, (char *) "rm -rf /var/lib/maxscale/avro"); - Test->repl->connect(); - execute_query(Test->repl->nodes[0], "DROP TABLE IF EXISTS t1"); - Test->repl->close_connections(); - sleep(5); + /** Start master to binlogrouter replication */ + if (!test.replicate_from_master()) + { + return 1; + } + test.set_timeout(120); + test.repl->connect(); - Test->start_binlog(); + create_t1(test.repl->nodes[0]); + insert_into_t1(test.repl->nodes[0], 3); + execute_query(test.repl->nodes[0], "FLUSH LOGS"); - Test->set_timeout(120); - - Test->stop_maxscale(); - - Test->ssh_maxscale(true, "rm -rf /var/lib/maxscale/avro"); - - Test->set_timeout(120); - - Test->start_maxscale(); - - Test->set_timeout(60); - - Test->repl->connect(); - create_t1(Test->repl->nodes[0]); - insert_into_t1(Test->repl->nodes[0], 3); - execute_query(Test->repl->nodes[0], "FLUSH LOGS"); - - Test->repl->close_connections(); - - Test->set_timeout(120); + test.repl->close_connections(); + /** Give avrorouter some time to process the events */ + test.stop_timeout(); sleep(10); + test.set_timeout(120); - char * avro_check = Test->ssh_maxscale_output(true, - "maxavrocheck -vv /var/lib/maxscale/avro/test.t1.000001.avro | grep \"{\""); - char * output = Test->ssh_maxscale_output(true, "maxavrocheck -d /var/lib/maxscale/avro/test.t1.000001.avro"); + char * avro_check = test.ssh_maxscale_output(true, + "maxavrocheck -vv /var/lib/maxscale/avro/test.t1.000001.avro | grep \"{\""); + char * output = test.ssh_maxscale_output(true, "maxavrocheck -d /var/lib/maxscale/avro/test.t1.000001.avro"); std::istringstream iss; iss.str(output); @@ -74,13 +62,13 @@ int main(int argc, char *argv[]) for (std::string line; std::getline(iss, line);) { long long int x1, fl; - Test->set_timeout(20); + test.set_timeout(20); get_x_fl_from_json((char*)line.c_str(), &x1, &fl); if (x1 != x1_exp || fl != fl_exp) { - Test->add_result(1, "Output:x1 %lld, fl %lld, Expected: x1 %d, fl %d", - x1, fl, x1_exp, fl_exp); + test.add_result(1, "Output:x1 %lld, fl %lld, Expected: x1 %d, fl %d", + x1, fl, x1_exp, fl_exp); break; } @@ -89,19 +77,17 @@ int main(int argc, char *argv[]) x1_exp = 0; x = x * 16; fl_exp++; - Test->tprintf("fl = %d", fl_exp); + test.tprintf("fl = %d", fl_exp); } } if (fl_exp != 3) { - Test->add_result(1, "not enough lines in avrocheck output\n"); + test.add_result(1, "not enough lines in avrocheck output"); } - Test->set_timeout(120); + execute_query(test.repl->nodes[0], "DROP TABLE test.t1;RESET MASTER"); + test.repl->fix_replication(); - int rval = Test->global_result; - delete Test; - return rval; + return test.global_result; } - diff --git a/maxscale-system-test/backend_auth_fail.cpp b/maxscale-system-test/backend_auth_fail.cpp index adc0ed3b1..e1504821d 100644 --- a/maxscale-system-test/backend_auth_fail.cpp +++ b/maxscale-system-test/backend_auth_fail.cpp @@ -31,7 +31,10 @@ int main(int argc, char** argv) } } + // Wait for the connections to clean up Test->stop_timeout(); + sleep(5); + Test->check_maxscale_alive(); int rval = Test->global_result; delete Test; diff --git a/maxscale-system-test/binlog_change_master.cpp b/maxscale-system-test/binlog_change_master.cpp index ef59392a7..ae6ed077f 100644 --- a/maxscale-system-test/binlog_change_master.cpp +++ b/maxscale-system-test/binlog_change_master.cpp @@ -25,6 +25,7 @@ TestConnections * Test ; int exit_flag; int master = 0; int i_trans = 0; +const int trans_max = 100; int failed_transaction_num = 0; /** The amount of rows each transaction inserts */ @@ -294,7 +295,7 @@ void *transaction_thread( void *ptr ) Test->add_result(mysql_errno(conn), "Error connecting to Binlog router, error: %s\n", mysql_error(conn)); create_t1(conn); - while ((exit_flag == 0)) + while ((exit_flag == 0) && i_trans < trans_max) { Test->tprintf("Transaction %d\n", i_trans); trans_result = transaction(conn, i_trans); diff --git a/maxscale-system-test/bug547.cpp b/maxscale-system-test/bug547.cpp index 8649f14a2..001fa1cc4 100644 --- a/maxscale-system-test/bug547.cpp +++ b/maxscale-system-test/bug547.cpp @@ -43,12 +43,12 @@ int main(int argc, char *argv[]) Test->set_timeout(30); Test->tprintf("Trying some queries, expecting failure, but not a crash\n"); - execute_query(Test->conn_rwsplit, (char *) "DROP TABLE IF EXISTS t1"); - execute_query(Test->conn_rwsplit, (char *) "CREATE TABLE t1 (x INT)"); - execute_query(Test->conn_rwsplit, (char *) "INSERT INTO t1 (x) VALUES (1)"); - execute_query(Test->conn_rwsplit, (char *) "select * from t1"); - execute_query(Test->conn_master, (char *) "select * from t1"); - execute_query(Test->conn_slave, (char *) "select * from t1"); + execute_query(Test->conn_rwsplit, "DROP TABLE IF EXISTS t1"); + execute_query(Test->conn_rwsplit, "CREATE TABLE t1 (x INT)"); + execute_query(Test->conn_rwsplit, "INSERT INTO t1 (x) VALUES (1)"); + execute_query(Test->conn_rwsplit, "select * from t1"); + execute_query(Test->conn_master, "select * from t1"); + execute_query(Test->conn_slave, "select * from t1"); Test->set_timeout(10); Test->close_maxscale_connections(); @@ -58,12 +58,8 @@ int main(int argc, char *argv[]) Test->stop_timeout(); sleep(15); - Test->check_log_err((char *) "fatal signal 11", false); - Test->check_log_err((char *) "Failed to create new router session for service 'RW-Split-Router'", true); - Test->check_log_err((char *) - "Failed to create new router session for service 'Read-Connection-Router-Master'", true); - Test->check_log_err((char *) "Failed to create new router session for service 'Read-Connection-Router-Slave'", - true); + Test->check_log_err("fatal signal 11", false); + Test->check_log_err("Failed to create new router session for service", true); int rval = Test->global_result; delete Test; diff --git a/maxscale-system-test/bug643.cpp b/maxscale-system-test/bug643.cpp index 6a3974882..acccc97c3 100644 --- a/maxscale-system-test/bug643.cpp +++ b/maxscale-system-test/bug643.cpp @@ -22,16 +22,6 @@ service=RW Split Router * - check warnig in the log "RW Split Router: Recursive use of tee filter in service" */ -/* -Mark Riddoch 2014-12-11 11:59:19 UTC -There is a recursive use of the tee filter in the configuration. - -The "RW Split Router" uses the"duplicate" filter that will then duplicate all traffic to the original destination and another copy of the "RW Split Router", which again will duplicate all traffic to the original destination and another copy of the "RW Split Router"... - -Really this needs to be trapped as a configuration error. -*/ - - #include #include "testconnections.h" @@ -56,7 +46,7 @@ int main(int argc, char *argv[]) Test->add_result(1, "FAIL: Query to broken service succeeded!\n"); } Test->close_maxscale_connections(); - Test->check_log_err((char *) "RW-Split-Router: Recursive use of tee filter in service", true); + Test->check_log_err("Recursive use of tee filter in service", true); int rval = Test->global_result; delete Test; diff --git a/maxscale-system-test/bug645_1.cpp b/maxscale-system-test/bug645_1.cpp index 26d1a56bc..b5a1b6068 100644 --- a/maxscale-system-test/bug645_1.cpp +++ b/maxscale-system-test/bug645_1.cpp @@ -1,104 +1,17 @@ /** * @file bug643.cpp regression case for bugs 645 ("Tee filter with readwritesplit service hangs MaxScale") - * - setup RWSplit in the following way - * @verbatim -[RW_Router] -type=service -router=readconnroute -servers=server1 -user=skysql -passwd=skysql -version_string=5.1-OLD-Bored-Mysql -filters=DuplicaFilter - -[RW_Split] -type=service -router=readwritesplit -servers=server3,server2 -user=skysql -passwd=skysql - -[DuplicaFilter] -type=filter -module=tee -service=RW_Split - -[RW_Listener] -type=listener -service=RW_Router -protocol=MySQLClient -port=4006 - -[RW_Split_list] -type=listener -service=RW_Split -protocol=MySQLClient -port=4016 - -[Read Connection Router Slave] -type=service -router=readconnroute -router_options= slave -servers=server1,server2,server3,server4 -user=skysql -passwd=skysql -filters=QLA - -[Read Connection Router Master] -type=service -router=readconnroute -router_options=master -servers=server1,server2,server3,server4 -user=skysql -passwd=skysql -filters=QLA - -[Read Connection Listener Slave] -type=listener -service=Read Connection Router Slave -protocol=MySQLClient -port=4009 - -[Read Connection Listener Master] -type=listener -service=Read Connection Router Master -protocol=MySQLClient -port=4008 - - - @endverbatim - * - try to connect to all services except 4016 - * - try simple query - * - check ReadConn is ok - * - check log for presens of "Couldn't find suitable Master from 2 candidates" errors + * + * - Try to connect to all services except 4016 + * - Try simple query on all services + * - Check log for presence of "Couldn't find suitable Master from 2 candidates" errors */ - -#include #include "testconnections.h" int main(int argc, char *argv[]) { - TestConnections * Test = new TestConnections(argc, argv); - Test->set_timeout(10); - - Test->connect_maxscale(); - Test->tprintf("trying query to RWSplit, expecting failure\n"); - if (execute_query(Test->conn_rwsplit, (char *) "show processlist") == 0) - { - Test->add_result(1, "Query is ok, but failue is expected\n"); - } - Test->tprintf("Trying query to ReadConn router master\n"); - Test->try_query(Test->conn_master, (char *) "show processlist"); - Test->tprintf("Trying query to ReadConn router slave\n"); - Test->try_query(Test->conn_slave, (char *) "show processlist"); - - Test->close_maxscale_connections(); - - Test->check_log_err((char *) "Couldn't find suitable Master from 2 candidates", true); - Test->check_log_err((char *) "Creating client session for Tee filter failed. Terminating session.", true); - - int rval = Test->global_result; - delete Test; - return rval; + TestConnections test(argc, argv); + test.check_maxscale_alive(); + test.check_log_err( "Couldn't find suitable Master from 2 candidates", true); + return test.global_result; } diff --git a/maxscale-system-test/bug649.cpp b/maxscale-system-test/bug649.cpp index bfd9bca3f..a414a6ea7 100644 --- a/maxscale-system-test/bug649.cpp +++ b/maxscale-system-test/bug649.cpp @@ -124,6 +124,11 @@ int main(int argc, char *argv[]) Test->try_query(Test->conn_rwsplit, (char *) "show processlist;"); Test->close_rwsplit(); + /** Clean up */ + Test->repl->connect(); + execute_query(Test->repl->nodes[0], "DROP DATABASE test"); + execute_query(Test->repl->nodes[0], "CREATE DATABASE test"); + int rval = Test->global_result; delete Test; return rval; diff --git a/maxscale-system-test/bug650.cpp b/maxscale-system-test/bug650.cpp index 94262277b..d7cfce143 100644 --- a/maxscale-system-test/bug650.cpp +++ b/maxscale-system-test/bug650.cpp @@ -1,84 +1,16 @@ /** * @file bug650.cpp regression case for bug 650 ("Hints, RWSplit: MaxScale goes into infinite loop and crashes") and bug645 - * - setup RWSplit in the following way - * @verbatim -[RW_Router] -type=service -router=readconnroute -servers=server1 -user=skysql -passwd=skysql -version_string=5.1-OLD-Bored-Mysql -filters=DuplicaFilter - -[RW_Split] -type=service -router=readwritesplit -servers=server3,server2 -user=skysql -passwd=skysql - -[DuplicaFilter] -type=filter -module=tee -service=RW_Split - -[RW_Listener] -type=listener -service=RW_Router -protocol=MySQLClient -port=4006 - -[RW_Split_list] -type=listener -service=RW_Split -protocol=MySQLClient -port=4016 - - @endverbatim - * - try to connect - * - try simple query using ReadConn router (both, master and slave) - * - check errors in the log - @verbatim - Couldn't find suitable Master from 2 candidates - Failed to create RW_Split session. - Creating client session for Tee filter failed. Terminating session. - Failed to create filter 'DuplicaFilter' for service 'RW_Router' - Setting up filters failed. Terminating session RW_Router - @endverbatim + * - try simple query using all services + * - check for errors in the log */ - -#include #include "testconnections.h" int main(int argc, char *argv[]) { - TestConnections * Test = new TestConnections(argc, argv); - Test->set_timeout(20); - - Test->connect_maxscale(); - Test->tprintf("Trying query to ReadConn master\n"); - Test->try_query(Test->conn_master, (char *) "show processlist"); - Test->tprintf("Trying query to ReadConn slave\n"); - Test->try_query(Test->conn_slave, (char *) "show processlist"); - Test->tprintf("Trying query to RWSplit, expecting failure\n"); - if (execute_query(Test->conn_rwsplit, (char *) "show processlist") == 0) - { - Test->add_result(1, "Query is ok, but failure is expected\n"); - } - Test->close_maxscale_connections(); - - Test->tprintf("Checking logs\n"); - - Test->check_log_err((char *) "Couldn't find suitable Master from 2 candidates", true); - Test->check_log_err((char *) "Failed to create new router session for service 'RW_Split'", true); - Test->check_log_err((char *) "Creating client session for Tee filter failed. Terminating session.", true); - Test->check_log_err((char *) "Failed to create filter 'DuplicaFilter' for service 'RW_Router'", true); - Test->check_log_err((char *) "Setting up filters failed. Terminating session RW_Router", true); - - int rval = Test->global_result; - delete Test; - return rval; + TestConnections test(argc, argv); + test.check_maxscale_alive(); + test.check_log_err( "Couldn't find suitable Master from 2 candidates", true); + test.check_log_err( "Failed to create new router session for service 'RW_Split'", true); + return test.global_result; } - diff --git a/maxscale-system-test/bug657.cpp b/maxscale-system-test/bug657.cpp index e6c9ec72f..cf41028ad 100644 --- a/maxscale-system-test/bug657.cpp +++ b/maxscale-system-test/bug657.cpp @@ -40,42 +40,6 @@ service=RW Split Router * - Reconnect readconnrouter */ -/* -Vilho Raatikka 2014-12-22 08:35:52 UTC -How to reproduce: -1. Configure readconnrouter with tee filter and tee filter with a readwritesplit as a child service. -2. Start MaxScale -3. Connect readconnrouter -4. Fail the master node -5. Reconnect readconnrouter - -As a consequence, next routeQuery will be duplicated to closed readwritesplit router and eventually fred memory will be accessed which causes SEGFAULT. - -Reason for this is that situation where child (=branch -) session is closed as a consequence of node failure, is not handled in tee filter. Tee filter handles the case where client closes the session. -Comment 1 Vilho Raatikka 2014-12-22 09:14:13 UTC -Background: client session may be closed for different reasons. If client actively closes it by sending COM_QUIT packet, it happens from top to bottom: packet is identified and client DCB is closed. Client's DCB close routine also closes the client router session. - -If backend fails and monitor detects it, then every DCB that isn't running or isn't master, slave, joined (Galera) nor ndb calls its hangup function. If the failed node was master then client session gets state SESSION_STATE_STOPPING which triggers first closing the client DCB and as a part of it, the whole session. - -In tee filter, the first issue is the client DCB's close routine which doesn't trigger closing the session. The other issue is that if child session gets closed there's no mechanism that would prevent future queries being routed to tee's child service. As a consequence, future calls to routeQuery will access closed child session including freed memory etc. -Comment 2 Vilho Raatikka 2014-12-22 22:32:25 UTC -session.c:session_free:if session is child of another service (tee in this case), it is the parent which releases child's allocated memory back to the system. This now also includes the child router session. - dcb.h: Added DCB_IS_CLONE macro - tee.c:freeSession:if parent session triggered closing of tee, then child session may not be closed yet. In that case free the child session first and only then free child router session and release child session's memory back to system. - tee.c:routeQuery: only route if child session is ready for routing. Log if session is not ready for routing and set tee session inactive - mysql_client.c:gw_client_close:if DCB is cloned one don't close the protocol because they it is shared with the original DCB. -Comment 3 Vilho Raatikka 2014-12-23 10:04:11 UTC -If monitor haven't yet changed the status for failed backend, even the fixed won't notice the failure, and the client is left waiting for reply until some lower level timeout exceeds and closes the socket. - -The solution is to register a callback function to readconnrouter's backend DCB in the same way that it is done in readwritesplit. Callback needs to be implemented and tests added. -By using this mechanism the client must wait at most one monitor interval before the session is closed. - -Vilho Raatikka 2014-12-31 23:19:41 UTC -filter.c:filter_free:if filter parameter is NULL, return. - tee.c:freeSession: if my_session->dummy_filterdef is NULL, don't try to release the memory -*/ - - #include #include "testconnections.h" #include "sql_t1.h" diff --git a/maxscale-system-test/bug664.cpp b/maxscale-system-test/bug664.cpp index ef7c22801..f1937ca0d 100644 --- a/maxscale-system-test/bug664.cpp +++ b/maxscale-system-test/bug664.cpp @@ -1,316 +1,17 @@ /** - * @file bug664.cpp bug664 regression case ("Core: Access of freed memory in gw_send_authentication_to_backend") + * @file bug664.cpp Tee filter branch session failure test * - * - Maxscale.cnf contains: - * @verbatim -[RW_Router] -type=service -router=readconnroute -servers=server1 -user=maxuser -passwd=maxpwd -version_string=5.1-OLD-Bored-Mysql -filters=DuplicaFilter - -[RW_Split] -type=service -router=readwritesplit -servers=server3,server2 -user=maxuser -passwd=maxpwd - -[DuplicaFilter] -type=filter -module=tee -service=RW_Split - -[RW_Listener] -type=listener -service=RW_Router -protocol=MySQLClient -port=4006 - -[RW_Split_list] -type=listener -service=RW_Split -protocol=MySQLClient -port=4016 - -[Read Connection Router Slave] -type=service -router=readconnroute -router_options= slave -servers=server1,server2,server3,server4 -user=maxuser -passwd=maxpwd -filters=QLA - -[Read Connection Router Master] -type=service -router=readconnroute -router_options=master -servers=server1,server2,server3,server4 -user=maxuser -passwd=maxpwd -filters=QLA - -[Read Connection Listener Slave] -type=listener -service=Read Connection Router Slave -protocol=MySQLClient -port=4009 - -[Read Connection Listener Master] -type=listener -service=Read Connection Router Master -protocol=MySQLClient -port=4008 - - @endverbatim - * - warning is expected in the log, but not an error. All Maxscale services should be alive. - * - Check MaxScale is alive + * - Configure MaxScale so that the branched session will always fail + * - Execute query on the main service and check that MaxScale is alive + * - An error should be logged about the failed branch session */ -/* -Vilho Raatikka 2014-12-29 18:12:23 UTC -All these cases are due to accessing freed dcb->data (MYSQL_session *): - -==12419== Invalid read of size 1 -==12419== at 0x1B1434BA: gw_send_authentication_to_backend (mysql_common.c:544) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690285 is 149 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x1B1434D6: gw_send_authentication_to_backend (mysql_common.c:547) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x186901f0 is 0 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 8 -==12419== at 0x1B1435FC: gw_send_authentication_to_backend (mysql_common.c:572) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x186901f0 is 0 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 8 -==12419== at 0x1B143606: gw_send_authentication_to_backend (mysql_common.c:572) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x186901f8 is 8 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 4 -==12419== at 0x1B143611: gw_send_authentication_to_backend (mysql_common.c:572) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690200 is 16 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x4C2CCA2: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B143719: gw_send_authentication_to_backend (mysql_common.c:604) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690204 is 20 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x4C2CCB4: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B143719: gw_send_authentication_to_backend (mysql_common.c:604) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690205 is 21 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x4C2CCA2: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B143893: gw_send_authentication_to_backend (mysql_common.c:660) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690204 is 20 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x4C2CCB4: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B143893: gw_send_authentication_to_backend (mysql_common.c:660) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690205 is 21 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x4C2DE21: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B1438AF: gw_send_authentication_to_backend (mysql_common.c:660) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x1869020a is 26 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 2 -==12419== at 0x4C2DEA0: memcpy@@GLIBC_2.14 (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B1438AF: gw_send_authentication_to_backend (mysql_common.c:660) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690206 is 22 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x4C2CCA2: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B1438BE: gw_send_authentication_to_backend (mysql_common.c:661) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690204 is 20 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== -==12419== Invalid read of size 1 -==12419== at 0x4C2CCB4: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x1B1438BE: gw_send_authentication_to_backend (mysql_common.c:661) -==12419== by 0x1B13F90E: gw_read_backend_event (mysql_backend.c:228) -==12419== by 0x588CA2: process_pollq (poll.c:858) -==12419== by 0x58854B: poll_waitevents (poll.c:608) -==12419== by 0x57C11B: main (gateway.c:1792) -==12419== Address 0x18690205 is 21 bytes inside a block of size 278 free'd -==12419== at 0x4C2AF6C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) -==12419== by 0x57D806: dcb_final_free (dcb.c:406) -==12419== by 0x57DDE6: dcb_process_zombies (dcb.c:603) -==12419== by 0x588598: poll_waitevents (poll.c:613) -==12419== by 0x57C11B: main (gateway.c:1792) -Comment 1 Vilho Raatikka 2014-12-29 18:29:11 UTC - dcb_final_free:don't free dcb->data, it is either freed in session_alloc if session creation fails or in session_free only. - mysql_client.c:gw_mysql_do_authentication:if anything fails, and session_alloc won't be called, free dcb->data. - mysql_common.c:gw_send_authentication_to_backend:if session is already closing then return with error. -Comment 2 Markus Mäkelä 2014-12-30 08:58:59 UTC -Created attachment 170 [details] -failing configuration - -The attached configuration currently crashes into a debug assert in handleError in readconnroute.c when connecting to port 4006. If this is removed, the next point of failure is in dcb_final_free when the session->data object is freed. -Comment 3 Vilho Raatikka 2014-12-30 10:14:43 UTC -(In reply to comment #2) -> Created attachment 170 [details] -> failing configuration -> -> The attached configuration currently crashes into a debug assert in -> handleError in readconnroute.c when connecting to port 4006. If this is -> removed, the next point of failure is in dcb_final_free when the -> session->data object is freed. - -Should this be open or closed based on the information provided? -Comment 4 Markus Mäkelä 2014-12-30 10:17:55 UTC -My apologies, I thought I did reopen it. -Comment 5 Vilho Raatikka 2014-12-30 10:27:32 UTC -Fixed double freeing dcb->data if authentication phase fails. -Comment 6 Vilho Raatikka 2014-12-30 10:30:12 UTC -Reopen due to crash. Another double free somewhere. -Comment 7 Vilho Raatikka 2014-12-30 11:36:38 UTC -Cloned session was freeing the shared 'data' dcb->data/session->data. Now only session_free for the non-clone session is allowed to free the data. - -*/ - - - -#include -#include #include "testconnections.h" -using namespace std; - int main(int argc, char *argv[]) { - - TestConnections * Test = new TestConnections(argc, argv); - Test->set_timeout(20); - - Test->connect_maxscale(); - - Test->tprintf("Trying query to ReadConn master\n"); - fflush(stdout); - Test->try_query(Test->conn_master, "show processlist;"); - Test->tprintf("Trying query to ReadConn slave\n"); - Test->try_query(Test->conn_slave, "show processlist;"); - - Test->close_maxscale_connections(); - - Test->check_log_err((char *) "Creating client session for Tee filter failed. Terminating session.", true); - Test->check_log_err((char *) "Failed to create filter 'DuplicaFilter' for service 'RW_Router'", true); - - int rval = Test->global_result; - delete Test; - return rval; + TestConnections test(argc, argv); + test.check_maxscale_alive(); + test.check_log_err("Failed to create new router session for service 'RW_Split'", true); + return test.global_result; } - diff --git a/maxscale-system-test/bug673.cpp b/maxscale-system-test/bug673.cpp index 2977bb2aa..860281c78 100644 --- a/maxscale-system-test/bug673.cpp +++ b/maxscale-system-test/bug673.cpp @@ -1,9 +1,10 @@ /** * @file bug673.cpp regression case for bug673 ("MaxScale crashes if "Users table data" is empty and "show dbusers" is executed in maxadmin") * - * - configure wrong IP for all backends - * - execute maxadmin command show dbusers "RW Split Router" - * - check MaxScale is alive by executing maxadmin again + * - Configure wrong IP for all backends + * - Execute maxadmin command show dbusers "RW Split Router" + * - Check MaxScale is alive by executing maxadmin again + * - Check that only new style object names in maxadmin commands are accepted */ #include "testconnections.h" @@ -11,20 +12,17 @@ int main(int argc, char *argv[]) { - char result[1024]; - TestConnections * Test = new TestConnections(argc, argv); - - Test->set_timeout(20); + TestConnections test(argc, argv); + test.set_timeout(60); for (int i = 0; i < 2; i++) { - Test->tprintf("Trying show dbusers \"RW Split Router\"\n"); - Test->add_result(Test->get_maxadmin_param((char *) "show dbusers \"RW Split Router\"", (char *) "User names:", - result), "Maxadmin failed\n"); - Test->tprintf("result %s\n", result); + char result[1024]; + test.add_result(test.get_maxadmin_param("show dbusers \"RW Split Router\"", "User names:", result) == 0, + "Old style objects in maxadmin commands should fail"); + test.add_result(test.get_maxadmin_param("show dbusers RW-Split-Router", "User names:", result), + "New style objects in maxadmin commands should succeed"); } - int rval = Test->global_result; - delete Test; - return rval; + return test.global_result; } diff --git a/maxscale-system-test/bug681.cpp b/maxscale-system-test/bug681.cpp index 53350a3de..519ff660b 100644 --- a/maxscale-system-test/bug681.cpp +++ b/maxscale-system-test/bug681.cpp @@ -44,8 +44,7 @@ int main(int argc, char *argv[]) Test->close_maxscale_connections(); - Test->check_log_err((char *) - "Unable to start RW-Split-Router service. There are too few backend servers configured in", true); + Test->check_log_err("There are too few backend servers configured in", true); int rval = Test->global_result; delete Test; diff --git a/maxscale-system-test/cdc_connector.cpp b/maxscale-system-test/cdc_connector.cpp index 0800d698e..b811c2297 100644 --- a/maxscale-system-test/cdc_connector.cpp +++ b/maxscale-system-test/cdc_connector.cpp @@ -204,6 +204,14 @@ bool Connection::readRow(std::string& dest) else { dest += buf; + + if (dest[0] == 'E' && dest[1] == 'R' & dest[2] == 'R') + { + m_error = "Server responded with an error: "; + m_error += dest; + rval = false; + break; + } } } diff --git a/maxscale-system-test/cdc_datatypes/cdc_datatypes.cpp b/maxscale-system-test/cdc_datatypes/cdc_datatypes.cpp index 97b5191a2..f9a29f379 100644 --- a/maxscale-system-test/cdc_datatypes/cdc_datatypes.cpp +++ b/maxscale-system-test/cdc_datatypes/cdc_datatypes.cpp @@ -143,6 +143,7 @@ bool run_test(TestConnections& test) { bool rval = true; + test.tprintf("Inserting data"); for (int x = 0; test_set[x].types; x++) { for (int i = 0; test_set[x].types[i]; i++) @@ -152,6 +153,7 @@ bool run_test(TestConnections& test) } } + test.tprintf("Waiting for avrorouter to process data"); test.repl->connect(); execute_query(test.repl->nodes[0], "FLUSH LOGS"); test.repl->close_connections(); @@ -196,6 +198,7 @@ bool run_test(TestConnections& test) std::string err = conn.getError(); test.tprintf("Failed to request data: %s", err.c_str()); rval = false; + break; } test.stop_timeout(); } @@ -209,8 +212,7 @@ int main(int argc, char *argv[]) TestConnections::check_nodes(false); TestConnections test(argc, argv); - test.start_binlog(); - test.restart_maxscale(); + test.replicate_from_master(); if (!run_test(test)) { diff --git a/maxscale-system-test/cdc_datatypes/cdc_result.h b/maxscale-system-test/cdc_datatypes/cdc_result.h index 3ef96982a..2475230d6 100644 --- a/maxscale-system-test/cdc_datatypes/cdc_result.h +++ b/maxscale-system-test/cdc_datatypes/cdc_result.h @@ -38,7 +38,9 @@ public: bool operator ==(const TestOutput& output) const { - return m_value == output.getValue(); + return m_value == output.getValue() || + (m_type.find("BLOB") != std::string::npos && + output.getValue().length() == 0); } bool operator !=(const TestOutput& output) const diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.bug657 b/maxscale-system-test/cnf/maxscale.cnf.template.bug657 new file mode 100755 index 000000000..8a58dd91b --- /dev/null +++ b/maxscale-system-test/cnf/maxscale.cnf.template.bug657 @@ -0,0 +1,97 @@ +[maxscale] +threads=###threads### +log_warning=1 + +[MySQL Monitor] +type=monitor +module=mysqlmon +###repl51### +servers= server1, server2,server3 ,server4 +user=maxskysql +passwd= skysql +monitor_interval=1000 + +[RW Split Router] +type=service +router= readwritesplit +servers=server1, server2, server3,server4 +user=maxskysql +passwd=skysql + +[Read Connection Router Slave] +type=service +router=readconnroute +router_options= slave +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql +filters=TEE + +[Read Connection Router Master] +type=service +router=readconnroute +router_options=master +servers=server1,server2,server3,server4 +user=maxskysql +passwd=skysql +filters=TEE + +[TEE] +type=filter +module=tee +service=RW Split Router + +[RW Split Listener] +type=listener +service=RW Split Router +protocol=MySQLClient +port=4006 +#socket=/tmp/rwsplit.sock + +[Read Connection Listener Slave] +type=listener +service=Read Connection Router Slave +protocol=MySQLClient +port=4009 + +[Read Connection Listener Master] +type=listener +service=Read Connection Router Master +protocol=MySQLClient +port=4008 + +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled + +socket=default + +[server1] +type=server +address=###node_server_IP_1### +port=###node_server_port_1### +protocol=MySQLBackend + +[server2] +type=server +address=###node_server_IP_2### +port=###node_server_port_2### +protocol=MySQLBackend + +[server3] +type=server +address=###node_server_IP_3### +port=###node_server_port_3### +protocol=MySQLBackend + +[server4] +type=server +address=###node_server_IP_4### +port=###node_server_port_4### +protocol=MySQLBackend + diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.galera b/maxscale-system-test/cnf/maxscale.cnf.template.galera index cddd792c5..d8654e688 100755 --- a/maxscale-system-test/cnf/maxscale.cnf.template.galera +++ b/maxscale-system-test/cnf/maxscale.cnf.template.galera @@ -1,6 +1,5 @@ [maxscale] threads=###threads### -log_warning=1 [Galera Monitor] type=monitor @@ -17,11 +16,13 @@ router=readwritesplit servers=server1,server2,server3,server4 user=maxskysql passwd=skysql +router_options=slave_selection_criteria=LEAST_GLOBAL_CONNECTIONS +max_slave_connections=1 [Read Connection Router Slave] type=service router=readconnroute -router_options= slave +router_options=slave servers=server1,server2,server3,server4 user=maxskysql passwd=skysql @@ -39,7 +40,6 @@ type=listener service=RW Split Router protocol=MySQLClient port=4006 -#socket=/tmp/rwsplit.sock [Read Connection Listener Slave] type=listener @@ -53,6 +53,16 @@ service=Read Connection Router Master protocol=MySQLClient port=4008 +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + [server1] type=server address=###galera_server_IP_1### @@ -76,14 +86,3 @@ type=server address=###galera_server_IP_4### port=###galera_server_port_4### protocol=MySQLBackend - -[CLI] -type=service -router=cli - -[CLI Listener] -type=listener -service=CLI -protocol=maxscaled -#address=localhost -socket=default diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.galera.weight b/maxscale-system-test/cnf/maxscale.cnf.template.galera.weight index 6e782421d..773433c4d 100755 --- a/maxscale-system-test/cnf/maxscale.cnf.template.galera.weight +++ b/maxscale-system-test/cnf/maxscale.cnf.template.galera.weight @@ -44,6 +44,16 @@ protocol=MySQLClient port=4008 #socket=/tmp/readconn.sock +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + [server1] type=server address=###galera_server_IP_1### diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.regexfilter1 b/maxscale-system-test/cnf/maxscale.cnf.template.regexfilter1 index d04e388c3..0cdbf351f 100644 --- a/maxscale-system-test/cnf/maxscale.cnf.template.regexfilter1 +++ b/maxscale-system-test/cnf/maxscale.cnf.template.regexfilter1 @@ -101,3 +101,13 @@ type=server address=###node_server_IP_4### port=###node_server_port_4### protocol=MySQLBackend + +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.replication b/maxscale-system-test/cnf/maxscale.cnf.template.replication index bd3d9c0a2..1d96d19e3 100755 --- a/maxscale-system-test/cnf/maxscale.cnf.template.replication +++ b/maxscale-system-test/cnf/maxscale.cnf.template.replication @@ -87,4 +87,3 @@ type=server address=###node_server_IP_4### port=###node_server_port_4### protocol=MySQLBackend - diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.replication.one_slave b/maxscale-system-test/cnf/maxscale.cnf.template.replication.one_slave index e84463849..8b8adc4a3 100755 --- a/maxscale-system-test/cnf/maxscale.cnf.template.replication.one_slave +++ b/maxscale-system-test/cnf/maxscale.cnf.template.replication.one_slave @@ -17,6 +17,7 @@ max_slave_connections=1 servers=server1,server2,server3,server4 user=maxskysql passwd=skysql +router_options=disable_sescmd_history=false [Read Connection Router Slave] type=service @@ -53,6 +54,16 @@ service=Read Connection Router Master protocol=MySQLClient port=4008 +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + [server1] type=server address=###node_server_IP_1### diff --git a/maxscale-system-test/copy_logs.sh b/maxscale-system-test/copy_logs.sh index 198157bb4..c573d3c98 100755 --- a/maxscale-system-test/copy_logs.sh +++ b/maxscale-system-test/copy_logs.sh @@ -2,7 +2,6 @@ # $1 - test name # $2 - time mark (in case of periodic logs copying) -set -x if [ -z $1 ]; then echo "Test name missing" diff --git a/maxscale-system-test/fwf_reload.cpp b/maxscale-system-test/fwf_reload.cpp index a820caf59..ebec2dfa8 100644 --- a/maxscale-system-test/fwf_reload.cpp +++ b/maxscale-system-test/fwf_reload.cpp @@ -36,7 +36,7 @@ int main(int argc, char *argv[]) sprintf(str, "rules%d", i); Test->set_timeout(180); copy_rules(Test, str, rules_dir); - Test->ssh_maxscale(true, "maxadmin call command dbfwfilter rules/reload Database-Firewall"); + Test->ssh_maxscale(true, "maxadmin call command dbfwfilter rules/reload \"Database Firewall\""); int local_result = 0; sprintf(pass_file, "%s/fw/pass%d", test_dir, i); @@ -100,7 +100,7 @@ int main(int argc, char *argv[]) copy_rules(Test, (char *) "rules_syntax_error", rules_dir); char *output = Test->ssh_maxscale_output(true, - "maxadmin call command dbfwfilter rules/reload \"Database Firewall\""); + "maxadmin call command dbfwfilter rules/reload Database-Firewall"); Test->add_result(strcasestr(output, "Failed") == NULL, "Reloading rules should fail with syntax errors"); Test->check_maxscale_processes(1); diff --git a/maxscale-system-test/mariadb_func.cpp b/maxscale-system-test/mariadb_func.cpp index 8a83fb711..0a1ef7882 100644 --- a/maxscale-system-test/mariadb_func.cpp +++ b/maxscale-system-test/mariadb_func.cpp @@ -461,14 +461,14 @@ int get_conn_num(MYSQL *conn, char * ip, char *hostname, char * db) MYSQL_RES *res; MYSQL_ROW row; unsigned long long int num_fields; - //unsigned long long int row_i=0; unsigned long long int rows; unsigned long long int i; unsigned int conn_num = 0; - char * hostname_internal; + const char * hostname_internal; + if (strcmp(ip, "127.0.0.1") == 0) { - hostname_internal = (char*) "localhost"; + hostname_internal = "localhost"; } else { @@ -516,9 +516,9 @@ int get_conn_num(MYSQL *conn, char * ip, char *hostname, char * db) } if (strcmp(ip, "127.0.0.1") == 0) { - // one extra connection i svisible in the processlist + // one extra connection is visible in the process list // output in case of local test - // (when maxscale is on the same machine as backends) + // (when MaxScale is on the same machine as backends) conn_num--; } return conn_num; diff --git a/maxscale-system-test/maxscale/java/CMakeLists.txt b/maxscale-system-test/maxscale/java/CMakeLists.txt index e97461f6d..704823c13 100644 --- a/maxscale-system-test/maxscale/java/CMakeLists.txt +++ b/maxscale-system-test/maxscale/java/CMakeLists.txt @@ -18,13 +18,22 @@ function(add_java_test name src entry_point template) endfunction() # Some constants that make changing the connector easier -set(JDBC_JAR ${CMAKE_CURRENT_SOURCE_DIR}/mariadb-java-client-1.5.4.jar CACHE INTERNAL "") +set(JDBC_JAR_NAME "mariadb-java-client-1.5.9.jar") +set(JDBC_JAR ${CMAKE_CURRENT_BINARY_DIR}/${JDBC_JAR_NAME} CACHE INTERNAL "") set(MXS_JAR ${CMAKE_CURRENT_BINARY_DIR}/maxscale_java.jar CACHE INTERNAL "") set(TEST_JARPATH "${MXS_JAR}:${JDBC_JAR}" CACHE INTERNAL "") +# If we don't have the JDBC driver, download it +if(NOT EXISTS ${JDBC_JAR}) + message(STATUS "Downloading MariaDB Connector-J: ${JDBC_JAR_NAME}") + file(DOWNLOAD https://downloads.mariadb.com/Connectors/java/connector-java-1.5.9/mariadb-java-client-1.5.9.jar + ${CMAKE_CURRENT_BINARY_DIR}/${JDBC_JAR_NAME} + SHOW_PROGRESS) +endif() + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/MaxScaleConfiguration.java.in ${CMAKE_CURRENT_BINARY_DIR}/MaxScaleConfiguration.java @ONLY) add_jar(maxscale_java SOURCES MaxScaleConnection.java MaxScaleConfiguration.java - INCLUDE_JARS mariadb-java-client-1.5.4.jar) + INCLUDE_JARS ${JDBC_JAR_NAME}) add_subdirectory(test1) add_subdirectory(prep_stmt) add_subdirectory(batch) diff --git a/maxscale-system-test/maxscale/java/mariadb-java-client-1.5.4.jar b/maxscale-system-test/maxscale/java/mariadb-java-client-1.5.4.jar deleted file mode 100644 index 5f138e467..000000000 Binary files a/maxscale-system-test/maxscale/java/mariadb-java-client-1.5.4.jar and /dev/null differ diff --git a/maxscale-system-test/mm_mysqlmon.cpp b/maxscale-system-test/mm_mysqlmon.cpp index e5e251808..0c2677c31 100644 --- a/maxscale-system-test/mm_mysqlmon.cpp +++ b/maxscale-system-test/mm_mysqlmon.cpp @@ -42,7 +42,7 @@ void check_status(TestConnections *Test, const char *server, const char *status) void check_group(TestConnections *Test, const char *server, const char *group) { - char *output = Test->ssh_maxscale_output(true, "maxadmin show monitor MySQL-Monitor"); + char *output = Test->ssh_maxscale_output(true, "maxadmin show monitor \"MySQL Monitor\""); if (output == NULL) { diff --git a/maxscale-system-test/mxs722.cpp b/maxscale-system-test/mxs722.cpp index 9a94ea9c7..332f4d35c 100644 --- a/maxscale-system-test/mxs722.cpp +++ b/maxscale-system-test/mxs722.cpp @@ -34,9 +34,10 @@ int main(int argc, char *argv[]) test->ssh_maxscale(true, "cp /etc/maxscale.cnf.backup /etc/maxscale.cnf"); /** Set router_options to a bad value */ - test->ssh_maxscale(true, "sed -i -e 's/router_options.*/router_options=bad_option=true/' /etc/maxscale.cnf"); - test->add_result(baseline == test->ssh_maxscale(true, "maxscale -c --user=maxscale"), - "Bad router_options should be detected.\n"); + // Disabled for 2.0 + //test->ssh_maxscale(true, "sed -i -e 's/router_options.*/router_options=bad_option=true/' /etc/maxscale.cnf"); + //test->add_result(baseline == test->ssh_maxscale(true, "maxscale -c --user=maxscale"), + // "Bad router_options should be detected.\n"); test->ssh_maxscale(true, "cp /etc/maxscale.cnf.backup /etc/maxscale.cnf"); diff --git a/maxscale-system-test/mxs822_maxpasswd.cpp b/maxscale-system-test/mxs822_maxpasswd.cpp index 0d3d1c9ef..483fab4b7 100644 --- a/maxscale-system-test/mxs822_maxpasswd.cpp +++ b/maxscale-system-test/mxs822_maxpasswd.cpp @@ -31,7 +31,7 @@ void try_password(TestConnections* Test, char * pass) */ Test->tprintf("Encrypting password: %s", pass); Test->set_timeout(30); - int rc = Test->ssh_maxscale(true, "maxpasswd '%s' | tr -dc '[:xdigit:]' > /tmp/pw.txt && " + int rc = Test->ssh_maxscale(true, "maxpasswd /var/lib/maxscale/ '%s' | tr -dc '[:xdigit:]' > /tmp/pw.txt && " "sed -i 's/user=.*/user=test/' /etc/maxscale.cnf && " "sed -i \"s/passwd=.*/passwd=$(cat /tmp/pw.txt)/\" /etc/maxscale.cnf && " "service maxscale restart && " diff --git a/maxscale-system-test/mxs874_slave_recovery.cpp b/maxscale-system-test/mxs874_slave_recovery.cpp index f3ebe8856..0e6d60805 100644 --- a/maxscale-system-test/mxs874_slave_recovery.cpp +++ b/maxscale-system-test/mxs874_slave_recovery.cpp @@ -18,37 +18,37 @@ using namespace std; int main(int argc, char *argv[]) { - TestConnections * Test = new TestConnections(argc, argv); - Test->set_timeout(10); + TestConnections test(argc, argv); + test.set_timeout(10); - Test->connect_maxscale(); + test.connect_maxscale(); - Test->set_timeout(10); - Test->try_query(Test->conn_rwsplit, (char *) "SET @a=1"); - Test->stop_timeout(); + test.set_timeout(10); + test.try_query(test.conn_rwsplit, (char *) "SET @a=1"); + test.stop_timeout(); sleep(1); - Test->set_timeout(20); - Test->tprintf("Blocking first slave\n"); - Test->repl->block_node(1); - Test->stop_timeout(); + test.set_timeout(20); + test.tprintf("Blocking first slave\n"); + test.repl->block_node(1); + test.stop_timeout(); sleep(5); - Test->set_timeout(10); - Test->tprintf("Unblocking first slave and blocking second slave\n"); + test.set_timeout(10); + test.tprintf("Unblocking first slave and blocking second slave\n"); - Test->repl->unblock_node(1); - Test->stop_timeout(); + test.repl->unblock_node(1); + test.stop_timeout(); sleep(5); - Test->repl->block_node(2); - Test->stop_timeout(); + test.repl->block_node(2); + test.stop_timeout(); sleep(5); - Test->set_timeout(20); + test.set_timeout(20); int retries; for (retries = 0; retries < 10; retries++) { char server1_status[256]; - Test->get_maxadmin_param((char *) "show server server2", (char *) "Status", server1_status); + test.get_maxadmin_param((char *) "show server server2", (char *) "Status", server1_status); if (strstr(server1_status, "Running")) { break; @@ -56,28 +56,26 @@ int main(int argc, char *argv[]) sleep(1); } - Test->add_result(retries == 10, "Slave is not recovered, slave status is not Running\n"); + test.add_result(retries == 10, "Slave is not recovered, slave status is not Running\n"); - Test->repl->connect(); - int real_id = Test->repl->get_server_id(1); + test.repl->connect(); + int real_id = test.repl->get_server_id(1); char server_id[200] = ""; - find_field(Test->conn_rwsplit, "SELECT @@server_id", "@@server_id", server_id); + find_field(test.conn_rwsplit, "SELECT @@server_id", "@@server_id", server_id); int queried_id = atoi(server_id); - Test->add_result(queried_id != real_id, "The query server ID '%d' does not match the one from server '%d'. " + test.add_result(queried_id != real_id, "The query server ID '%d' does not match the one from server '%d'. " "Slave was not recovered.", queried_id, real_id); char userval[200] = ""; - find_field(Test->conn_rwsplit, "SELECT @a", "@a", userval); + find_field(test.conn_rwsplit, "SELECT @a", "@a", userval); - Test->add_result(atoi(userval) != 1, "User variable @a is not 1, it is '%s'", userval); + test.add_result(atoi(userval) != 1, "User variable @a is not 1, it is '%s'", userval); - Test->tprintf("Unblocking second slave\n"); - Test->repl->unblock_node(2); + test.tprintf("Unblocking second slave\n"); + test.repl->unblock_node(2); - Test->check_maxscale_alive(); - int rval = Test->global_result; - delete Test; - return rval; + test.check_maxscale_alive(); + return test.global_result; } diff --git a/maxscale-system-test/rw_select_insert.cpp b/maxscale-system-test/rw_select_insert.cpp index fd31594a6..11de0eb1c 100644 --- a/maxscale-system-test/rw_select_insert.cpp +++ b/maxscale-system-test/rw_select_insert.cpp @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) Test->tprintf("Connecting to RWSplit %s\n", Test->maxscale_IP); Test->connect_rwsplit(); - Test->execute_maxadmin_command((char *) "shutdown monitor MySQL-Monitor"); + Test->execute_maxadmin_command((char *) "shutdown monitor \"MySQL Monitor\""); get_global_status_allnodes(&selects[0], &inserts[0], Test->repl, silent); diff --git a/maxscale-system-test/ses_bigmem.cpp b/maxscale-system-test/ses_bigmem.cpp index 85fd69316..560a42196 100644 --- a/maxscale-system-test/ses_bigmem.cpp +++ b/maxscale-system-test/ses_bigmem.cpp @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) Test->try_query(Test->routers[j], (char*) "select 1;"); Test->try_query(Test->routers[j], (char*) "set autocommit=1;"); Test->try_query(Test->routers[j], (char*) "select 2;"); - if ((i / 100) * 100 == i) + if ((i / 1000) * 1000 == i) { Test->tprintf("i=%d\n", i); } diff --git a/maxscale-system-test/slave_failover.cpp b/maxscale-system-test/slave_failover.cpp index ecad48b09..901bc7c05 100644 --- a/maxscale-system-test/slave_failover.cpp +++ b/maxscale-system-test/slave_failover.cpp @@ -16,59 +16,37 @@ int main(int argc, char *argv[]) { - TestConnections * Test = new TestConnections(argc, argv); - Test->set_timeout(20); + TestConnections test(argc, argv); + printf("Connecting to RWSplit"); + test.set_timeout(60); + test.add_result(test.connect_rwsplit(), "Error connection to RWSplit! Exiting"); + sleep(5); + + test.tprintf("Checking current slave"); int res = 0; + int old_slave = test.find_connected_slave(&res); + test.add_result(res, "no current slave"); - unsigned int current_slave; - unsigned int old_slave; + test.tprintf("Setup firewall to block mysql on old slave (oldslave is node %d)", old_slave); - printf("Connecting to RWSplit %s\n", Test->maxscale_IP); - if (Test->connect_rwsplit() != 0) - { - Test->add_result(1, "Error connection to RWSplit! Exiting\n"); - } - else - { + test.add_result((old_slave < 0) || (old_slave >= test.repl->N), "Active slave is not found"); + test.repl->block_node(old_slave); - Test->tprintf("Checking current slave\n"); - old_slave = Test->find_connected_slave( &res); + test.tprintf("Waiting for MaxScale to find a new slave"); + test.stop_timeout(); + sleep(10); - Test->add_result(res, "no current slave\n"); + test.set_timeout(20); + int current_slave = test.find_connected_slave(&res); + test.add_result((current_slave == old_slave) || (current_slave < 0), "No failover happened"); - Test->tprintf("Setup firewall to block mysql on old slave (oldslave is node %d)\n", old_slave); - if ((old_slave < 0) || (old_slave >= Test->repl->N)) - { - Test->add_result(1, "Active slave is not found\n"); - } - else - { - Test->repl->block_node(old_slave); + test.tprintf("Unblock old node"); + test.repl->unblock_node(old_slave); + test.close_rwsplit(); - Test->tprintf("Sleeping 60 seconds to let MaxScale to find new slave\n"); - Test->stop_timeout(); - sleep(60); - Test->set_timeout(20); + test.check_maxscale_alive(); + test.stop_timeout(); + test.repl->fix_replication(); - current_slave = Test->find_connected_slave(&res); - if ((current_slave == old_slave) || (current_slave < 0)) - { - Test->add_result(1, "No failover happened\n"); - } - - Test->tprintf("Setup firewall back to allow mysql\n"); - Test->repl->unblock_node(old_slave); - - Test->close_rwsplit(); - - Test->check_maxscale_alive(); - Test->set_timeout(20); - } - Test->set_timeout(200); - Test->repl->start_replication(); - } - - int rval = Test->global_result; - delete Test; - return rval; + return test.global_result; } diff --git a/maxscale-system-test/testconnections.cpp b/maxscale-system-test/testconnections.cpp index a151c1c41..9d6a2e446 100644 --- a/maxscale-system-test/testconnections.cpp +++ b/maxscale-system-test/testconnections.cpp @@ -954,6 +954,44 @@ int TestConnections::start_binlog() return global_result; } +bool TestConnections::replicate_from_master() +{ + bool rval = true; + + /** Stop the binlogrouter */ + MYSQL* conn = open_conn_no_db(binlog_port, maxscale_IP, repl->user_name, repl->password, ssl); + + if (execute_query(conn, "stop slave")) + { + rval = false; + } + mysql_close(conn); + + /** Clean up MaxScale directories */ + prepare_binlog(); + ssh_maxscale(true, "service maxscale restart"); + + char log_file[256] = ""; + char log_pos[256] = "4"; + + repl->execute_query_all_nodes("STOP SLAVE"); + repl->connect(); + execute_query(repl->nodes[0], "RESET MASTER"); + + conn = open_conn_no_db(binlog_port, maxscale_IP, repl->user_name, repl->password, ssl); + + if (find_field(repl->nodes[0], "show master status", "File", log_file) || + repl->set_slave(conn, repl->IP[0], repl->port[0], log_file, log_pos) || + execute_query(conn, "start slave")) + { + rval = false; + } + + mysql_close(conn); + + return rval; +} + int TestConnections::start_mm() { int i; diff --git a/maxscale-system-test/testconnections.h b/maxscale-system-test/testconnections.h index b5e171859..a79c01a93 100644 --- a/maxscale-system-test/testconnections.h +++ b/maxscale-system-test/testconnections.h @@ -442,6 +442,11 @@ public: */ int start_binlog(); + /** + * @brief Start binlogrouter replication from master + */ + bool replicate_from_master(); + /** * @brief prepare_binlog clean up binlog directory, set proper access rights to it * @return 0 diff --git a/pcre2/132html b/pcre2/132html index b06259848..3a16a59e0 100755 --- a/pcre2/132html +++ b/pcre2/132html @@ -148,7 +148,7 @@ while () printf("
  • $title\n", $ref, $ref); printf TEMP ("
    $title
    \n", - $ref, $ref); + $ref); $ref++; } else diff --git a/pcre2/AUTHORS b/pcre2/AUTHORS index 14a1a19fd..e056ad686 100644 --- a/pcre2/AUTHORS +++ b/pcre2/AUTHORS @@ -8,7 +8,7 @@ Email domain: cam.ac.uk University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2015 University of Cambridge +Copyright (c) 1997-2017 University of Cambridge All rights reserved @@ -19,7 +19,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2015 Zoltan Herczeg +Copyright(c) 2010-2017 Zoltan Herczeg All rights reserved. @@ -30,7 +30,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2015 Zoltan Herczeg +Copyright(c) 2009-2017 Zoltan Herczeg All rights reserved. #### diff --git a/pcre2/CMakeLists.txt b/pcre2/CMakeLists.txt index b625873cc..883e947b0 100644 --- a/pcre2/CMakeLists.txt +++ b/pcre2/CMakeLists.txt @@ -67,7 +67,16 @@ # 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".") # 2013-11-05 PH added support for PARENS_NEST_LIMIT # 2014-08-29 PH converted the file for PCRE2 (which has no C++). -# 2015-04024 PH added support for PCRE2_DEBUG +# 2015-04-24 PH added support for PCRE2_DEBUG +# 2015-07-16 PH updated for new pcre2_find_bracket source module +# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III) +# 2015-10=16 PH added support for never-backslash-C +# 2016-03-01 PH applied Chris Wilson's patch for MSVC static +# 2016-06-24 PH applied Chris Wilson's second patch, putting the first under +# a new option instead of being unconditional. +# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch +# fix by David Gaussmann +# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE PROJECT(PCRE2 C) @@ -79,7 +88,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0) SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake -SET(CMAKE_C_FLAGS "-I${PROJECT_SOURCE_DIR}/src ${CMAKE_C_FLAGS}") +SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src") # external packages FIND_PACKAGE( BZip2 ) @@ -140,7 +149,10 @@ SET(PCRE2_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING "Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.") SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING - "Buffer size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.") + "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.") + +SET(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING + "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.") SET(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).") @@ -154,12 +166,18 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.") +SET(PCRE2_SUPPORT_PCRE2GREP_CALLOUT ON CACHE BOOL + "Enable callout string support in pcre2grep.") + SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks") +SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL + "If ON, backslash-C (upper case C) is locked out.") + SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.") @@ -178,6 +196,9 @@ IF (MINGW) ENDIF(MINGW) IF(MSVC) + OPTION(PCRE2_STATIC_RUNTIME + "ON=Compile against the static runtime (/MT)." + OFF) OPTION(INSTALL_MSVC_PDB "ON=Install .pdb files built by MSVC, if generated" OFF) @@ -250,6 +271,10 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF) SET(BSR_ANYCRLF 1) ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF) +IF(PCRE2_NEVER_BACKSLASH_C) + SET(NEVER_BACKSLASH_C 1) +ENDIF(PCRE2_NEVER_BACKSLASH_C) + IF(PCRE2_SUPPORT_UNICODE) SET(SUPPORT_UNICODE 1) ENDIF(PCRE2_SUPPORT_UNICODE) @@ -262,6 +287,10 @@ IF(PCRE2_SUPPORT_PCRE2GREP_JIT) SET(SUPPORT_PCRE2GREP_JIT 1) ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT) +IF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) + SET(SUPPORT_PCRE2GREP_CALLOUT 1) +ENDIF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) + IF(PCRE2_SUPPORT_VALGRIND) SET(SUPPORT_VALGRIND 1) ENDIF(PCRE2_SUPPORT_VALGRIND) @@ -390,6 +419,7 @@ SET(PCRE2_SOURCES src/pcre2_context.c src/pcre2_dfa_match.c src/pcre2_error.c + src/pcre2_find_bracket.c src/pcre2_jit_compile.c src/pcre2_maketables.c src/pcre2_match.c @@ -445,6 +475,18 @@ SET(PCRE2POSIX_SOURCES ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) ENDIF(MSVC AND NOT PCRE2_STATIC) +# Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681 +# This code was taken from the CMake wiki, not from WebM. + +IF(MSVC AND PCRE2_STATIC_RUNTIME) + MESSAGE(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library") + foreach(flag_var + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endforeach() +ENDIF(MSVC AND PCRE2_STATIC_RUNTIME) + # Build setup ADD_DEFINITIONS(-DHAVE_CONFIG_H) @@ -468,21 +510,19 @@ IF(PCRE2_BUILD_PCRE2_8) ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) SET_PROPERTY(TARGET pcre2-8 PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) -SET_PROPERTY(TARGET pcre2-8 - PROPERTY VERSION 1.0.0) SET(targets ${targets} pcre2-8) -ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) -SET_PROPERTY(TARGET pcre2posix +ADD_LIBRARY(pcre2-posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) +SET_PROPERTY(TARGET pcre2-posix PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) -SET(targets ${targets} pcre2posix) -TARGET_LINK_LIBRARIES(pcre2posix pcre2-8) +SET(targets ${targets} pcre2-posix) +TARGET_LINK_LIBRARIES(pcre2-posix pcre2-8) IF(MINGW AND NOT PCRE2_STATIC) IF(NON_STANDARD_LIB_PREFIX) - SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES PREFIX "") + SET_TARGET_PROPERTIES(pcre2-8 pcre2-posix PROPERTIES PREFIX "") ENDIF(NON_STANDARD_LIB_PREFIX) IF(NON_STANDARD_LIB_SUFFIX) - SET_TARGET_PROPERTIES(pcre2-8 pcre2posix PROPERTIES SUFFIX "-0.dll") + SET_TARGET_PROPERTIES(pcre2-8 pcre2-posix PROPERTIES SUFFIX "-0.dll") ENDIF(NON_STANDARD_LIB_SUFFIX) ENDIF(MINGW AND NOT PCRE2_STATIC) ENDIF(PCRE2_BUILD_PCRE2_8) @@ -530,7 +570,7 @@ IF(PCRE2_BUILD_PCRE2GREP) SET_PROPERTY(TARGET pcre2grep PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) SET(targets ${targets} pcre2grep) - TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS}) + TARGET_LINK_LIBRARIES(pcre2grep pcre2-posix ${PCRE2GREP_LIBS}) ENDIF(PCRE2_BUILD_PCRE2GREP) # Testing @@ -543,7 +583,7 @@ IF(PCRE2_BUILD_TESTS) ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES}) SET(targets ${targets} pcre2test) IF(PCRE2_BUILD_PCRE2_8) - LIST(APPEND PCRE2TEST_LIBS pcre2posix pcre2-8) + LIST(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8) ENDIF(PCRE2_BUILD_PCRE2_8) IF(PCRE2_BUILD_PCRE2_16) LIST(APPEND PCRE2TEST_LIBS pcre2-16) @@ -718,6 +758,7 @@ IF(PCRE2_SHOW_REPORT) MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}") MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}") MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") + MESSAGE(STATUS " \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}") MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}") MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}") MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}") @@ -730,6 +771,7 @@ IF(PCRE2_SHOW_REPORT) MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}") MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}") + MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2_SUPPORT_PCRE2GREP_CALLOUT}") MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}") MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}") MESSAGE(STATUS " and pcre2grep)") diff --git a/pcre2/COPYING b/pcre2/COPYING index 94a9ed024..c233950f6 100644 --- a/pcre2/COPYING +++ b/pcre2/COPYING @@ -1,674 +1,5 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 +PCRE2 LICENCE - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. +Please see the file LICENCE in the PCRE2 distribution for licensing details. - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. +End diff --git a/pcre2/ChangeLog b/pcre2/ChangeLog index 196e95320..70ecb9dea 100644 --- a/pcre2/ChangeLog +++ b/pcre2/ChangeLog @@ -1,6 +1,933 @@ Change Log for PCRE2 -------------------- + +Version 10.23 14-February-2017 +------------------------------ + +1. Extended pcre2test with the utf8_input modifier so that it is able to +generate all possible 16-bit and 32-bit code unit values in non-UTF modes. + +2. In any wide-character mode (8-bit UTF or any 16-bit or 32-bit mode), without +PCRE2_UCP set, a negative character type such as \D in a positive class should +cause all characters greater than 255 to match, whatever else is in the class. +There was a bug that caused this not to happen if a Unicode property item was +added to such a class, for example [\D\P{Nd}] or [\W\pL]. + +3. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed, including: + + (a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead + of giving an invalid quantifier error. + + (b) {0} can now be used after a group in a lookbehind assertion; previously + this caused an "assertion is not fixed length" error. + + (c) Perl always treats (?(DEFINE) as a "define" group, even if a group with + the name "DEFINE" exists. PCRE2 now does likewise. + + (d) A recursion condition test such as (?(R2)...) must now refer to an + existing subpattern. + + (e) A conditional recursion test such as (?(R)...) misbehaved if there was a + group whose name began with "R". + + (f) When testing zero-terminated patterns under valgrind, the terminating + zero is now marked "no access". This catches bugs that would otherwise + show up only with non-zero-terminated patterns. + + (g) A hyphen appearing immediately after a POSIX character class (for example + /[[:ascii:]-z]/) now generates an error. Perl does accept this as a + literal, but gives a warning, so it seems best to fail it in PCRE. + + (h) An empty \Q\E sequence may appear after a callout that precedes an + assertion condition (it is, of course, ignored). + +One effect of the refactoring is that some error numbers and messages have +changed, and the pattern offset given for compiling errors is not always the +right-most character that has been read. In particular, for a variable-length +lookbehind assertion it now points to the start of the assertion. Another +change is that when a callout appears before a group, the "length of next +pattern item" that is passed now just gives the length of the opening +parenthesis item, not the length of the whole group. A length of zero is now +given only for a callout at the end of the pattern. Automatic callouts are no +longer inserted before and after explicit callouts in the pattern. + +A number of bugs in the refactored code were subsequently fixed during testing +before release, but after the code was made available in the repository. Many +of the bugs were discovered by fuzzing testing. Several of them were related to +the change from assuming a zero-terminated pattern (which previously had +required non-zero terminated strings to be copied). These bugs were never in +fully released code, but are noted here for the record. + + (a) An overall recursion such as (?0) inside a lookbehind assertion was not + being diagnosed as an error. + + (b) In utf mode, the length of a *MARK (or other verb) name was being checked + in characters instead of code units, which could lead to bad code being + compiled, leading to unpredictable behaviour. + + (c) In extended /x mode, characters whose code was greater than 255 caused + a lookup outside one of the global tables. A similar bug existed for wide + characters in *VERB names. + + (d) The amount of memory needed for a compiled pattern was miscalculated if a + lookbehind contained more than one toplevel branch and the first branch + was of length zero. + + (e) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero- + terminated pattern, if a # comment ran on to the end of the pattern, one + or more code units past the end were being read. + + (f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g. + "{2,2") could cause reading beyond the pattern. + + (g) When reading a callout string, if the end delimiter was at the end of the + pattern one further code unit was read. + + (h) An unterminated number after \g' could cause reading beyond the pattern. + + (i) An insufficient memory size was being computed for compiling with + PCRE2_AUTO_CALLOUT. + + (j) A conditional group with an assertion condition used more memory than was + allowed for it during parsing, so too many of them could therefore + overrun a buffer. + + (k) If parsing a pattern exactly filled the buffer, the internal test for + overrun did not check when the final META_END item was added. + + (l) If a lookbehind contained a subroutine call, and the called group + contained an option setting such as (?s), and the PCRE2_ANCHORED option + was set, unpredictable behaviour could occur. The underlying bug was + incorrect code and insufficient checking while searching for the end of + the called subroutine in the parsed pattern. + + (m) Quantifiers following (*VERB)s were not being diagnosed as errors. + + (n) The use of \Q...\E in a (*VERB) name when PCRE2_ALT_VERBNAMES and + PCRE2_AUTO_CALLOUT were both specified caused undetermined behaviour. + + (o) If \Q was preceded by a quantified item, and the following \E was + followed by '?' or '+', and there was at least one literal character + between them, an internal error "unexpected repeat" occurred (example: + /.+\QX\E+/). + + (p) A buffer overflow could occur while sorting the names in the group name + list (depending on the order in which the names were seen). + + (q) A conditional group that started with a callout was not doing the right + check for a following assertion, leading to compiling bad code. Example: + /(?(C'XX))?!XX/ + + (r) If a character whose code point was greater than 0xffff appeared within + a lookbehind that was within another lookbehind, the calculation of the + lookbehind length went wrong and could provoke an internal error. + + (t) The sequence \E- or \Q\E- after a POSIX class in a character class caused + an internal error. Now the hyphen is treated as a literal. + +4. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +5. pcre2test has been upgraded so that, when run under valgrind with valgrind +support enabled, reading past the end of the pattern is detected, both when +compiling and during callout processing. + +6. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +7. Automatic callouts are no longer generated before and after callouts in the +pattern. + +8. When pcre2test was outputing information from a callout, the caret indicator +for the current position in the subject line was incorrect if it was after an +escape sequence for a character whose code point was greater than \x{ff}. + +9. Change 19 for 10.22 had a typo (PCRE_STATIC_RUNTIME should be +PCRE2_STATIC_RUNTIME). Fix from David Gaussmann. + +10. Added --max-buffer-size to pcre2grep, to allow for automatic buffer +expansion when long lines are encountered. Original patch by Dmitry +Cherniachenko. + +11. If pcre2grep was compiled with JIT support, but the library was compiled +without it (something that neither ./configure nor CMake allow, but it can be +done by editing config.h), pcre2grep was giving a JIT error. Now it detects +this situation and does not try to use JIT. + +12. Added some "const" qualifiers to variables in pcre2grep. + +13. Added Dmitry Cherniachenko's patch for colouring output in Windows +(untested by me). Also, look for GREP_COLOUR or GREP_COLOR if the environment +variables PCRE2GREP_COLOUR and PCRE2GREP_COLOR are not found. + +14. Add the -t (grand total) option to pcre2grep. + +15. A number of bugs have been mended relating to match start-up optimizations +when the first thing in a pattern is a positive lookahead. These all applied +only when PCRE2_NO_START_OPTIMIZE was *not* set: + + (a) A pattern such as (?=.*X)X$ was incorrectly optimized as if it needed + both an initial 'X' and a following 'X'. + (b) Some patterns starting with an assertion that started with .* were + incorrectly optimized as having to match at the start of the subject or + after a newline. There are cases where this is not true, for example, + (?=.*[A-Z])(?=.{8,16})(?!.*[\s]) matches after the start in lines that + start with spaces. Starting .* in an assertion is no longer taken as an + indication of matching at the start (or after a newline). + +16. The "offset" modifier in pcre2test was not being ignored (as documented) +when the POSIX API was in use. + +17. Added --enable-fuzz-support to "configure", causing an non-installed +library containing a test function that can be called by fuzzers to be +compiled. A non-installed binary to run the test function locally, called +pcre2fuzzcheck is also compiled. + +18. A pattern with PCRE2_DOTALL (/s) set but not PCRE2_NO_DOTSTAR_ANCHOR, and +which started with .* inside a positive lookahead was incorrectly being +compiled as implicitly anchored. + +19. Removed all instances of "register" declarations, as they are considered +obsolete these days and in any case had become very haphazard. + +20. Add strerror() to pcre2test for failed file opening. + +21. Make pcre2test -C list valgrind support when it is enabled. + +22. Add the use_length modifier to pcre2test. + +23. Fix an off-by-one bug in pcre2test for the list of names for 'get' and +'copy' modifiers. + +24. Add PCRE2_CALL_CONVENTION into the prototype declarations in pcre2.h as it +is apparently needed there as well as in the function definitions. (Why did +nobody ask for this in PCRE1?) + +25. Change the _PCRE2_H and _PCRE2_UCP_H guard macros in the header files to +PCRE2_H_IDEMPOTENT_GUARD and PCRE2_UCP_H_IDEMPOTENT_GUARD to be more standard +compliant and unique. + +26. pcre2-config --libs-posix was listing -lpcre2posix instead of +-lpcre2-posix. Also, the CMake build process was building the library with the +wrong name. + +27. In pcre2test, give some offset information for errors in hex patterns. +This uses the C99 formatting sequence %td, except for MSVC which doesn't +support it - %lu is used instead. + +28. Implemented pcre2_code_copy_with_tables(), and added pushtablescopy to +pcre2test for testing it. + +29. Fix small memory leak in pcre2test. + +30. Fix out-of-bounds read for partial matching of /./ against an empty string +when the newline type is CRLF. + +31. Fix a bug in pcre2test that caused a crash when a locale was set either in +the current pattern or a previous one and a wide character was matched. + +32. The appearance of \p, \P, or \X in a substitution string when +PCRE2_SUBSTITUTE_EXTENDED was set caused a segmentation fault (NULL +dereference). + +33. If the starting offset was specified as greater than the subject length in +a call to pcre2_substitute() an out-of-bounds memory reference could occur. + +34. When PCRE2 was compiled to use the heap instead of the stack for recursive +calls to match(), a repeated minimizing caseless back reference, or a +maximizing one where the two cases had different numbers of code units, +followed by a caseful back reference, could lose the caselessness of the first +repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX +but didn't). + +35. When a pattern is too complicated, PCRE2 gives up trying to find a minimum +matching length and just records zero. Typically this happens when there are +too many nested or recursive back references. If the limit was reached in +certain recursive cases it failed to be triggered and an internal error could +be the result. + +36. The pcre2_dfa_match() function now takes note of the recursion limit for +the internal recursive calls that are used for lookrounds and recursions within +the pattern. + +37. More refactoring has got rid of the internal could_be_empty_branch() +function (around 400 lines of code, including comments) by keeping track of +could-be-emptiness as the pattern is compiled instead of scanning compiled +groups. (This would have been much harder before the refactoring of #3 above.) +This lifts a restriction on the number of branches in a group (more than about +1100 would give "pattern is too complicated"). + +38. Add the "-ac" command line option to pcre2test as a synonym for "-pattern +auto_callout". + +39. In a library with Unicode support, incorrect data was compiled for a +pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide +characters to match (for example, /[\s[:^ascii:]]/). + +40. The callout_error modifier has been added to pcre2test to make it possible +to return PCRE2_ERROR_CALLOUT from a callout. + +41. A minor change to pcre2grep: colour reset is now "[0m" instead of +"[00m". + +42. The limit in the auto-possessification code that was intended to catch +overly-complicated patterns and not spend too much time auto-possessifying was +being reset too often, resulting in very long compile times for some patterns. +Now such patterns are no longer completely auto-possessified. + +43. Applied Jason Hood's revised patch for RunTest.bat. + +44. Added a new Windows script RunGrepTest.bat, courtesy of Jason Hood. + +45. Minor cosmetic fix to pcre2test: move a variable that is not used under +Windows into the "not Windows" code. + +46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy +some of the code: + + * normalised the Windows condition by ensuring WIN32 is defined; + * enables the callout feature under Windows; + * adds globbing (Microsoft's implementation expands quoted args), + using a tweaked opendirectory; + * implements the is_*_tty functions for Windows; + * --color=always will write the ANSI sequences to file; + * add sequences 4 (underline works on Win10) and 5 (blink as bright + background, relatively standard on DOS/Win); + * remove the (char *) casts for the now-const strings; + * remove GREP_COLOUR (grep's command line allowed the 'u', but not + the environment), parsing GREP_COLORS instead; + * uses the current colour if not set, rather than black; + * add print_match for the undefined case; + * fixes a typo. + +In addition, colour settings containing anything other than digits and +semicolon are ignored, and the colour controls are no longer output for empty +strings. + +47. Detecting patterns that are too large inside the length-measuring loop +saves processing ridiculously long patterns to their end. + +48. Ignore PCRE2_CASELESS when processing \h, \H, \v, and \V in classes as it +just wastes time. In the UTF case it can also produce redundant entries in +XCLASS lists caused by characters with multiple other cases and pairs of +characters in the same "not-x" sublists. + +49. A pattern such as /(?=(a\K))/ can report the end of the match being before +its start; pcre2test was not handling this correctly when using the POSIX +interface (it was OK with the native interface). + +50. In pcre2grep, ignore all JIT compile errors. This means that pcre2grep will +continue to work, falling back to interpretation if anything goes wrong with +JIT. + +51. Applied patches from Christian Persch to configure.ac to make use of the +AC_USE_SYSTEM_EXTENSIONS macro and to test for functions used by the JIT +modules. + +52. Minor fixes to pcre2grep from Jason Hood: + * fixed some spacing; + * Windows doesn't usually use single quotes, so I've added a define + to use appropriate quotes [in an example]; + * LC_ALL was displayed as "LCC_ALL"; + * numbers 11, 12 & 13 should end in "th"; + * use double quotes in usage message. + +53. When autopossessifying, skip empty branches without recursion, to reduce +stack usage for the benefit of clang with -fsanitize-address, which uses huge +stack frames. Example pattern: /X?(R||){3335}/. Fixes oss-fuzz issue 553. + +54. A pattern with very many explicit back references to a group that is a long +way from the start of the pattern could take a long time to compile because +searching for the referenced group in order to find the minimum length was +being done repeatedly. Now up to 128 group minimum lengths are cached and the +attempt to find a minimum length is abandoned if there is a back reference to a +group whose number is greater than 128. (In that case, the pattern is so +complicated that this optimization probably isn't worth it.) This fixes +oss-fuzz issue 557. + +55. Issue 32 for 10.22 below was not correctly fixed. If pcre2grep in multiline +mode with --only-matching matched several lines, it restarted scanning at the +next line instead of moving on to the end of the matched string, which can be +several lines after the start. + +56. Applied Jason Hood's new patch for RunGrepTest.bat that updates it in line +with updates to the non-Windows version. + + + +Version 10.22 29-July-2016 +-------------------------- + +1. Applied Jason Hood's patches to RunTest.bat and testdata/wintestoutput3 +to fix problems with running the tests under Windows. + +2. Implemented a facility for quoting literal characters within hexadecimal +patterns in pcre2test, to make it easier to create patterns with just a few +non-printing characters. + +3. Binary zeros are not supported in pcre2test input files. It now detects them +and gives an error. + +4. Updated the valgrind parameters in RunTest: (a) changed smc-check=all to +smc-check=all-non-file; (b) changed obj:* in the suppression file to obj:??? so +that it matches only unknown objects. + +5. Updated the maintenance script maint/ManyConfigTests to make it easier to +select individual groups of tests. + +6. When the POSIX wrapper function regcomp() is called, the REG_NOSUB option +used to set PCRE2_NO_AUTO_CAPTURE when calling pcre2_compile(). However, this +disables the use of back references (and subroutine calls), which are supported +by other implementations of regcomp() with RE_NOSUB. Therefore, REG_NOSUB no +longer causes PCRE2_NO_AUTO_CAPTURE to be set, though it still ignores nmatch +and pmatch when regexec() is called. + +7. Because of 6 above, pcre2test has been modified with a new modifier called +posix_nosub, to call regcomp() with REG_NOSUB. Previously the no_auto_capture +modifier had this effect. That option is now ignored when the POSIX API is in +use. + +8. Minor tidies to the pcre2demo.c sample program, including more comments +about its 8-bit-ness. + +9. Detect unmatched closing parentheses and give the error in the pre-scan +instead of later. Previously the pre-scan carried on and could give a +misleading incorrect error message. For example, /(?J)(?'a'))(?'a')/ gave a +message about invalid duplicate group names. + +10. It has happened that pcre2test was accidentally linked with another POSIX +regex library instead of libpcre2-posix. In this situation, a call to regcomp() +(in the other library) may succeed, returning zero, but of course putting its +own data into the regex_t block. In one example the re_pcre2_code field was +left as NULL, which made pcre2test think it had not got a compiled POSIX regex, +so it treated the next line as another pattern line, resulting in a confusing +error message. A check has been added to pcre2test to see if the data returned +from a successful call of regcomp() are valid for PCRE2's regcomp(). If they +are not, an error message is output and the pcre2test run is abandoned. The +message points out the possibility of a mis-linking. Hopefully this will avoid +some head-scratching the next time this happens. + +11. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind +assertion, caused pcre2test to output a very large number of spaces when the +callout was taken, making the program appearing to loop. + +12. A pattern that included (*ACCEPT) in the middle of a sufficiently deeply +nested set of parentheses of sufficient size caused an overflow of the +compiling workspace (which was diagnosed, but of course is not desirable). + +13. Detect missing closing parentheses during the pre-pass for group +identification. + +14. Changed some integer variable types and put in a number of casts, following +a report of compiler warnings from Visual Studio 2013 and a few tests with +gcc's -Wconversion (which still throws up a lot). + +15. Implemented pcre2_code_copy(), and added pushcopy and #popcopy to pcre2test +for testing it. + +16. Change 66 for 10.21 introduced the use of snprintf() in PCRE2's version of +regerror(). When the error buffer is too small, my version of snprintf() puts a +binary zero in the final byte. Bug #1801 seems to show that other versions do +not do this, leading to bad output from pcre2test when it was checking for +buffer overflow. It no longer assumes a binary zero at the end of a too-small +regerror() buffer. + +17. Fixed typo ("&&" for "&") in pcre2_study(). Fortunately, this could not +actually affect anything, by sheer luck. + +18. Two minor fixes for MSVC compilation: (a) removal of apparently incorrect +"const" qualifiers in pcre2test and (b) defining snprintf as _snprintf for +older MSVC compilers. This has been done both in src/pcre2_internal.h for most +of the library, and also in src/pcre2posix.c, which no longer includes +pcre2_internal.h (see 24 below). + +19. Applied Chris Wilson's patch (Bugzilla #1681) to CMakeLists.txt for MSVC +static compilation. Subsequently applied Chris Wilson's second patch, putting +the first patch under a new option instead of being unconditional when +PCRE_STATIC is set. + +20. Updated pcre2grep to set stdout as binary when run under Windows, so as not +to convert \r\n at the ends of reflected lines into \r\r\n. This required +ensuring that other output that is written to stdout (e.g. file names) uses the +appropriate line terminator: \r\n for Windows, \n otherwise. + +21. When a line is too long for pcre2grep's internal buffer, show the maximum +length in the error message. + +22. Added support for string callouts to pcre2grep (Zoltan's patch with PH +additions). + +23. RunTest.bat was missing a "set type" line for test 22. + +24. The pcre2posix.c file was including pcre2_internal.h, and using some +"private" knowledge of the data structures. This is unnecessary; the code has +been re-factored and no longer includes pcre2_internal.h. + +25. A racing condition is fixed in JIT reported by Mozilla. + +26. Minor code refactor to avoid "array subscript is below array bounds" +compiler warning. + +27. Minor code refactor to avoid "left shift of negative number" warning. + +28. Add a bit more sanity checking to pcre2_serialize_decode() and document +that it expects trusted data. + +29. Fix typo in pcre2_jit_test.c + +30. Due to an oversight, pcre2grep was not making use of JIT when available. +This is now fixed. + +31. The RunGrepTest script is updated to use the valgrind suppressions file +when testing with JIT under valgrind (compare 10.21/51 below). The suppressions +file is updated so that is now the same as for PCRE1: it suppresses the +Memcheck warnings Addr16 and Cond in unknown objects (that is, JIT-compiled +code). Also changed smc-check=all to smc-check=all-non-file as was done for +RunTest (see 4 above). + +32. Implemented the PCRE2_NO_JIT option for pcre2_match(). + +33. Fix typo that gave a compiler error when JIT not supported. + +34. Fix comment describing the returns from find_fixedlength(). + +35. Fix potential negative index in pcre2test. + +36. Calls to pcre2_get_error_message() with error numbers that are never +returned by PCRE2 functions were returning empty strings. Now the error code +PCRE2_ERROR_BADDATA is returned. A facility has been added to pcre2test to +show the texts for given error numbers (i.e. to call pcre2_get_error_message() +and display what it returns) and a few representative error codes are now +checked in RunTest. + +37. Added "&& !defined(__INTEL_COMPILER)" to the test for __GNUC__ in +pcre2_match.c, in anticipation that this is needed for the same reason it was +recently added to pcrecpp.cc in PCRE1. + +38. Using -o with -M in pcre2grep could cause unnecessary repeated output when +the match extended over a line boundary, as it tried to find more matches "on +the same line" - but it was already over the end. + +39. Allow \C in lookbehinds and DFA matching in UTF-32 mode (by converting it +to the same code as '.' when PCRE2_DOTALL is set). + +40. Fix two clang compiler warnings in pcre2test when only one code unit width +is supported. + +41. Upgrade RunTest to automatically re-run test 2 with a large (64M) stack if +it fails when running the interpreter with a 16M stack (and if changing the +stack size via pcre2test is possible). This avoids having to manually set a +large stack size when testing with clang. + +42. Fix register overwite in JIT when SSE2 acceleration is enabled. + +43. Detect integer overflow in pcre2test pattern and data repetition counts. + +44. In pcre2test, ignore "allcaptures" after DFA matching. + +45. Fix unaligned accesses on x86. Patch by Marc Mutz. + +46. Fix some more clang compiler warnings. + + +Version 10.21 12-January-2016 +----------------------------- + +1. Improve matching speed of patterns starting with + or * in JIT. + +2. Use memchr() to find the first character in an unanchored match in 8-bit +mode in the interpreter. This gives a significant speed improvement. + +3. Removed a redundant copy of the opcode_possessify table in the +pcre2_auto_possessify.c source. + +4. Fix typos in dftables.c for z/OS. + +5. Change 36 for 10.20 broke the handling of [[:>:]] and [[:<:]] in that +processing them could involve a buffer overflow if the following character was +an opening parenthesis. + +6. Change 36 for 10.20 also introduced a bug in processing this pattern: +/((?x)(*:0))#(?'/. Specifically: if a setting of (?x) was followed by a (*MARK) +setting (which (*:0) is), then (?x) did not get unset at the end of its group +during the scan for named groups, and hence the external # was incorrectly +treated as a comment and the invalid (?' at the end of the pattern was not +diagnosed. This caused a buffer overflow during the real compile. This bug was +discovered by Karl Skomski with the LLVM fuzzer. + +7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its +own source module to avoid a circular dependency between src/pcre2_compile.c +and src/pcre2_study.c + +8. A callout with a string argument containing an opening square bracket, for +example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer +overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +9. The handling of callouts during the pre-pass for named group identification +has been tightened up. + +10. The quantifier {1} can be ignored, whether greedy, non-greedy, or +possessive. This is a very minor optimization. + +11. A possessively repeated conditional group that could match an empty string, +for example, /(?(R))*+/, was incorrectly compiled. + +12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian +Persch). + +13. An empty comment (?#) in a pattern was incorrectly processed and could +provoke a buffer overflow. This bug was discovered by Karl Skomski with the +LLVM fuzzer. + +14. Fix infinite recursion in the JIT compiler when certain patterns such as +/(?:|a|){100}x/ are analysed. + +15. Some patterns with character classes involving [: and \\ were incorrectly +compiled and could cause reading from uninitialized memory or an incorrect +error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The +first of these bugs was discovered by Karl Skomski with the LLVM fuzzer. + +16. Pathological patterns containing many nested occurrences of [: caused +pcre2_compile() to run for a very long time. This bug was found by the LLVM +fuzzer. + +17. A missing closing parenthesis for a callout with a string argument was not +being diagnosed, possibly leading to a buffer overflow. This bug was found by +the LLVM fuzzer. + +18. A conditional group with only one branch has an implicit empty alternative +branch and must therefore be treated as potentially matching an empty string. + +19. If (?R was followed by - or + incorrect behaviour happened instead of a +diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +20. Another bug that was introduced by change 36 for 10.20: conditional groups +whose condition was an assertion preceded by an explicit callout with a string +argument might be incorrectly processed, especially if the string contained \Q. +This bug was discovered by Karl Skomski with the LLVM fuzzer. + +21. Compiling PCRE2 with the sanitize options of clang showed up a number of +very pedantic coding infelicities and a buffer overflow while checking a UTF-8 +string if the final multi-byte UTF-8 character was truncated. + +22. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +23. Finding the minimum matching length of complex patterns with back +references and/or recursions can take a long time. There is now a cut-off that +gives up trying to find a minimum length when things get too complex. + +24. An optimization has been added that speeds up finding the minimum matching +length for patterns containing repeated capturing groups or recursions. + +25. If a pattern contained a back reference to a group whose number was +duplicated as a result of appearing in a (?|...) group, the computation of the +minimum matching length gave a wrong result, which could cause incorrect "no +match" errors. For such patterns, a minimum matching length cannot at present +be computed. + +26. Added a check for integer overflow in conditions (?() and +(?(R). This omission was discovered by Karl Skomski with the LLVM +fuzzer. + +27. Fixed an issue when \p{Any} inside an xclass did not read the current +character. + +28. If pcre2grep was given the -q option with -c or -l, or when handling a +binary file, it incorrectly wrote output to stdout. + +29. The JIT compiler did not restore the control verb head in case of *THEN +control verbs. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +30. The way recursive references such as (?3) are compiled has been re-written +because the old way was the cause of many issues. Now, conversion of the group +number into a pattern offset does not happen until the pattern has been +completely compiled. This does mean that detection of all infinitely looping +recursions is postponed till match time. In the past, some easy ones were +detected at compile time. This re-writing was done in response to yet another +bug found by the LLVM fuzzer. + +31. A test for a back reference to a non-existent group was missing for items +such as \987. This caused incorrect code to be compiled. This issue was found +by Karl Skomski with a custom LLVM fuzzer. + +32. Error messages for syntax errors following \g and \k were giving inaccurate +offsets in the pattern. + +33. Improve the performance of starting single character repetitions in JIT. + +34. (*LIMIT_MATCH=) now gives an error instead of setting the value to 0. + +35. Error messages for syntax errors in *LIMIT_MATCH and *LIMIT_RECURSION now +give the right offset instead of zero. + +36. The JIT compiler should not check repeats after a {0,1} repeat byte code. +This issue was found by Karl Skomski with a custom LLVM fuzzer. + +37. The JIT compiler should restore the control chain for empty possessive +repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +38. A bug which was introduced by the single character repetition optimization +was fixed. + +39. Match limit check added to recursion. This issue was found by Karl Skomski +with a custom LLVM fuzzer. + +40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look +only at the part of the subject that is relevant when the starting offset is +non-zero. + +41. Improve first character match in JIT with SSE2 on x86. + +42. Fix two assertion fails in JIT. These issues were found by Karl Skomski +with a custom LLVM fuzzer. + +43. Correct the setting of CMAKE_C_FLAGS in CMakeLists.txt (patch from Roy Ivy +III). + +44. Fix bug in RunTest.bat for new test 14, and adjust the script for the added +test (there are now 20 in total). + +45. Fixed a corner case of range optimization in JIT. + +46. Add the ${*MARK} facility to pcre2_substitute(). + +47. Modifier lists in pcre2test were splitting at spaces without the required +commas. + +48. Implemented PCRE2_ALT_VERBNAMES. + +49. Fixed two issues in JIT. These were found by Karl Skomski with a custom +LLVM fuzzer. + +50. The pcre2test program has been extended by adding the #newline_default +command. This has made it possible to run the standard tests when PCRE2 is +compiled with either CR or CRLF as the default newline convention. As part of +this work, the new command was added to several test files and the testing +scripts were modified. The pcre2grep tests can now also be run when there is no +LF in the default newline convention. + +51. The RunTest script has been modified so that, when JIT is used and valgrind +is specified, a valgrind suppressions file is set up to ignore "Invalid read of +size 16" errors because these are false positives when the hardware supports +the SSE2 instruction set. + +52. It is now possible to have comment lines amid the subject strings in +pcre2test (and perltest.sh) input. + +53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). + +54. Add the null_context modifier to pcre2test so that calling pcre2_compile() +and the matching functions with NULL contexts can be tested. + +55. Implemented PCRE2_SUBSTITUTE_EXTENDED. + +56. In a character class such as [\W\p{Any}] where both a negative-type escape +("not a word character") and a property escape were present, the property +escape was being ignored. + +57. Fixed integer overflow for patterns whose minimum matching length is very, +very large. + +58. Implemented --never-backslash-C. + +59. Change 55 above introduced a bug by which certain patterns provoked the +erroneous error "\ at end of pattern". + +60. The special sequences [[:<:]] and [[:>:]] gave rise to incorrect compiling +errors or other strange effects if compiled in UCP mode. Found with libFuzzer +and AddressSanitizer. + +61. Whitespace at the end of a pcre2test pattern line caused a spurious error +message if there were only single-character modifiers. It should be ignored. + +62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results +or segmentation errors for some patterns. Found with libFuzzer and +AddressSanitizer. + +63. Very long names in (*MARK) or (*THEN) etc. items could provoke a buffer +overflow. + +64. Improve error message for overly-complicated patterns. + +65. Implemented an optional replication feature for patterns in pcre2test, to +make it easier to test long repetitive patterns. The tests for 63 above are +converted to use the new feature. + +66. In the POSIX wrapper, if regerror() was given too small a buffer, it could +misbehave. + +67. In pcre2_substitute() in UTF mode, the UTF validity check on the +replacement string was happening before the length setting when the replacement +string was zero-terminated. + +68. In pcre2_substitute() in UTF mode, PCRE2_NO_UTF_CHECK can be set for the +second and subsequent calls to pcre2_match(). + +69. There was no check for integer overflow for a replacement group number in +pcre2_substitute(). An added check for a number greater than the largest group +number in the pattern means this is not now needed. + +70. The PCRE2-specific VERSION condition didn't work correctly if only one +digit was given after the decimal point, or if more than two digits were given. +It now works with one or two digits, and gives a compile time error if more are +given. + +71. In pcre2_substitute() there was the possibility of reading one code unit +beyond the end of the replacement string. + +72. The code for checking a subject's UTF-32 validity for a pattern with a +lookbehind involved an out-of-bounds pointer, which could potentially cause +trouble in some environments. + +73. The maximum lookbehind length was incorrectly calculated for patterns such +as /(?<=(a)(?-1))x/ which have a recursion within a backreference. + +74. Give an error if a lookbehind assertion is longer than 65535 code units. + +75. Give an error in pcre2_substitute() if a match ends before it starts (as a +result of the use of \K). + +76. Check the length of subpattern names and the names in (*MARK:xx) etc. +dynamically to avoid the possibility of integer overflow. + +77. Implement pcre2_set_max_pattern_length() so that programs can restrict the +size of patterns that they are prepared to handle. + +78. (*NO_AUTO_POSSESS) was not working. + +79. Adding group information caching improves the speed of compiling when +checking whether a group has a fixed length and/or could match an empty string, +especially when recursion or subroutine calls are involved. However, this +cannot be used when (?| is present in the pattern because the same number may +be used for groups of different sizes. To catch runaway patterns in this +situation, counts have been introduced to the functions that scan for empty +branches or compute fixed lengths. + +80. Allow for the possibility of the size of the nest_save structure not being +a factor of the size of the compiling workspace (it currently is). + +81. Check for integer overflow in minimum length calculation and cap it at +65535. + +82. Small optimizations in code for finding the minimum matching length. + +83. Lock out configuring for EBCDIC with non-8-bit libraries. + +84. Test for error code <= 0 in regerror(). + +85. Check for too many replacements (more than INT_MAX) in pcre2_substitute(). + +86. Avoid the possibility of computing with an out-of-bounds pointer (though +not dereferencing it) while handling lookbehind assertions. + +87. Failure to get memory for the match data in regcomp() is now given as a +regcomp() error instead of waiting for regexec() to pick it up. + +88. In pcre2_substitute(), ensure that CRLF is not split when it is a valid +newline sequence. + +89. Paranoid check in regcomp() for bad error code from pcre2_compile(). + +90. Run test 8 (internal offsets and code sizes) for link sizes 3 and 4 as well +as for link size 2. + +91. Document that JIT has a limit on pattern size, and give more information +about JIT compile failures in pcre2test. + +92. Implement PCRE2_INFO_HASBACKSLASHC. + +93. Re-arrange valgrind support code in pcre2test to avoid spurious reports +with JIT (possibly caused by SSE2?). + +94. Support offset_limit in JIT. + +95. A sequence such as [[:punct:]b] that is, a POSIX character class followed +by a single ASCII character in a class item, was incorrectly compiled in UCP +mode. The POSIX class got lost, but only if the single character followed it. + +96. [:punct:] in UCP mode was matching some characters in the range 128-255 +that should not have been matched. + +97. If [:^ascii:] or [:^xdigit:] are present in a non-negated class, all +characters with code points greater than 255 are in the class. When a Unicode +property was also in the class (if PCRE2_UCP is set, escapes such as \w are +turned into Unicode properties), wide characters were not correctly handled, +and could fail to match. + +98. In pcre2test, make the "startoffset" modifier a synonym of "offset", +because it sets the "startoffset" parameter for pcre2_match(). + +99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between +an item and its qualifier (for example, A(?#comment)?B) pcre2_compile() +misbehaved. This bug was found by the LLVM fuzzer. + +100. The error for an invalid UTF pattern string always gave the code unit +offset as zero instead of where the invalidity was found. + +101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not +working correctly in UCP mode. + +102. Similar to 99 above, if an isolated \E was present between an item and its +qualifier when PCRE2_AUTO_CALLOUT was set, pcre2_compile() misbehaved. This bug +was found by the LLVM fuzzer. + +103. The POSIX wrapper function regexec() crashed if the option REG_STARTEND +was set when the pmatch argument was NULL. It now returns REG_INVARG. + +104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep. + +105. An empty \Q\E sequence between an item and its qualifier caused +pcre2_compile() to misbehave when auto callouts were enabled. This bug +was found by the LLVM fuzzer. + +106. If both PCRE2_ALT_VERBNAMES and PCRE2_EXTENDED were set, and a (*MARK) or +other verb "name" ended with whitespace immediately before the closing +parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when +both those options were set. + +107. In a number of places pcre2_compile() was not handling NULL characters +correctly, and pcre2test with the "bincode" modifier was not always correctly +displaying fields containing NULLS: + + (a) Within /x extended #-comments + (b) Within the "name" part of (*MARK) and other *verbs + (c) Within the text argument of a callout + +108. If a pattern that was compiled with PCRE2_EXTENDED started with white +space or a #-type comment that was followed by (?-x), which turns off +PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again, +pcre2_compile() assumed that (?-x) applied to the whole pattern and +consequently mis-compiled it. This bug was found by the LLVM fuzzer. The fix +for this bug means that a setting of any of the (?imsxJU) options at the start +of a pattern is no longer transferred to the options that are returned by +PCRE2_INFO_ALLOPTIONS. In fact, this was an anachronism that should have +changed when the effects of those options were all moved to compile time. + +109. An escaped closing parenthesis in the "name" part of a (*verb) when +PCRE2_ALT_VERBNAMES was set caused pcre2_compile() to malfunction. This bug +was found by the LLVM fuzzer. + +110. Implemented PCRE2_SUBSTITUTE_UNSET_EMPTY, and updated pcre2test to make it +possible to test it. + +111. "Harden" pcre2test against ridiculously large values in modifiers and +command line arguments. + +112. Implemented PCRE2_SUBSTITUTE_UNKNOWN_UNSET and PCRE2_SUBSTITUTE_OVERFLOW_ +LENGTH. + +113. Fix printing of *MARK names that contain binary zeroes in pcre2test. + + Version 10.20 30-June-2015 -------------------------- diff --git a/pcre2/HACKING b/pcre2/HACKING index 051520c28..5c77601bc 100644 --- a/pcre2/HACKING +++ b/pcre2/HACKING @@ -7,8 +7,8 @@ but with a revised (and incompatible) API. To avoid confusion, the original library is referred to as PCRE1 below. For information about testing PCRE2, see the pcre2test documentation and the comment at the head of the RunTest file. -PCRE1 releases were up to 8.3x when PCRE2 was developed. The 8.xx series will -continue for bugfixes if necessary. PCRE2 releases started at 10.00 to avoid +PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix +releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid confusion with PCRE1. @@ -16,19 +16,20 @@ Historical note 1 ----------------- Many years ago I implemented some regular expression functions to an algorithm -suggested by Martin Richards. These were not Unix-like in form, and were quite -restricted in what they could do by comparison with Perl. The interesting part -about the algorithm was that the amount of space required to hold the compiled -form of an expression was known in advance. The code to apply an expression did -not operate by backtracking, as the original Henry Spencer code and current -PCRE2 and Perl code does, but instead checked all possibilities simultaneously -by keeping a list of current states and checking all of them as it advanced -through the subject string. In the terminology of Jeffrey Friedl's book, it was -a "DFA algorithm", though it was not a traditional Finite State Machine (FSM). -When the pattern was all used up, all remaining states were possible matches, -and the one matching the longest subset of the subject string was chosen. This -did not necessarily maximize the individual wild portions of the pattern, as is -expected in Unix and Perl-style regular expressions. +suggested by Martin Richards. The rather simple patterns were not Unix-like in +form, and were quite restricted in what they could do by comparison with Perl. +The interesting part about the algorithm was that the amount of space required +to hold the compiled form of an expression was known in advance. The code to +apply an expression did not operate by backtracking, as the original Henry +Spencer code and current PCRE2 and Perl code does, but instead checked all +possibilities simultaneously by keeping a list of current states and checking +all of them as it advanced through the subject string. In the terminology of +Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a +traditional Finite State Machine (FSM). When the pattern was all used up, all +remaining states were possible matches, and the one matching the longest subset +of the subject string was chosen. This did not necessarily maximize the +individual wild portions of the pattern, as is expected in Unix and Perl-style +regular expressions. Historical note 2 @@ -85,7 +86,7 @@ had become very complicated and hard to maintain. Indeed one of the early things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then I had a flash of inspiration as to how I could run the real compile function in a "fake" mode that enables it to compute how much memory it would need, while -actually only ever using a few hundred bytes of working memory, and without too +in most cases only ever using a small amount of working memory, and without too many tests of the mode that might slow it down. So I refactored the compiling functions to work this way. This got rid of about 600 lines of source. It should make future maintenance and development easier. As this was such a major @@ -104,20 +105,208 @@ system stack used by the compile function, which uses recursive function calls for nested parenthesized groups. This is a safety feature for environments with small stacks where the patterns are provided by users. -History repeated itself for release 10.20. A number of bugs relating to named -subpatterns had been discovered by fuzzers. Most of these were related to the -handling of forward references when it was not known if the named pattern was + +Yet another pattern scan +------------------------ + +History repeated itself for PCRE2 release 10.20. A number of bugs relating to +named subpatterns had been discovered by fuzzers. Most of these were related to +the handling of forward references when it was not known if the named group was unique. (References to non-unique names use a different opcode and more memory.) The use of duplicate group numbers (the (?| facility) also caused -issues. +issues. -To get around these problems I adopted a new approach by adding a third pass, -really a "pre-pass", over the pattern, which does nothing other than identify -all the named subpatterns and their corresponding group numbers. This means -that the actual compile (both pre-pass and real compile) have full knowledge of -group names and numbers throughout. Several dozen lines of messy code were -eliminated, though the new pre-pass is not short (skipping over [] classes is -complicated). +To get around these problems I adopted a new approach by adding a third pass +over the pattern (really a "pre-pass"), which did nothing other than identify +all the named subpatterns and their corresponding group numbers. This means +that the actual compile (both the memory-computing dummy run and the real +compile) has full knowledge of group names and numbers throughout. Several +dozen lines of messy code were eliminated, though the new pre-pass was not +short. In particular, parsing and skipping over [] classes is complicated. + +While working on 10.22 I realized that I could simplify yet again by moving +more of the parsing into the pre-pass, thus avoiding doing it in two places, so +after 10.22 was released, the code underwent yet another big refactoring. This +is how it is from 10.23 onwards: + +The function called parse_regex() scans the pattern characters, parsing them +into literal data and meta characters. It converts escapes such as \x{123} +into literals, handles \Q...\E, and skips over comments and non-significant +white space. The result of the scanning is put into a vector of 32-bit unsigned +integers. Values less than 0x80000000 are literal data. Higher values represent +meta-characters. The top 16-bits of such values identify the meta-character, +and these are given names such as META_CAPTURE. The lower 16-bits are available +for data, for example, the capturing group number. The only situation in which +literal data values greater than 0x7fffffff can appear is when the 32-bit +library is running in non-UTF mode. This is handled by having a special +meta-character that is followed by the 32-bit data value. + +The size of the parsed pattern vector, when auto-callouts are not enabled, is +bounded by the length of the pattern (with one exception). The code is written +so that each item in the pattern uses no more vector elements than the number +of code units in the item itself. The exception is the aforementioned large +32-bit number handling. For this reason, 32-bit non-UTF patterns are scanned in +advance to check for such values. When auto-callouts are enabled, the generous +assumption is made that there will be a callout for each pattern code unit +(which of course is only actually true if all code units are literals) plus one +at the end. There is a default parsed pattern vector on the stack, but if this +is not big enough, heap memory is used. + +As before, the actual compiling function is run twice, the first time to +determine the amount of memory needed for the final compiled pattern. It +now processes the parsed pattern vector, not the pattern itself, although some +of the parsed items refer to strings in the pattern - for example, group +names. As escapes and comments have already been processed, the code is a bit +simpler than before. + +Most errors can be diagnosed during the parsing scan. For those that cannot +(for example, "lookbehind assertion is not fixed length"), the parsed code +contains offsets into the pattern so that the actual compiling code can +identify where errors occur. + + +The elements of the parsed pattern vector +----------------------------------------- + +The word "offset" below means a code unit offset into the pattern. When +PCRE2_SIZE (which is usually size_t) is no bigger than uint32_t, an offset is +stored in a single parsed pattern element. Otherwise (typically on 64-bit +systems) it occupies two elements. The following meta items occupy just one +element, with no data: + +META_ACCEPT (*ACCEPT) +META_ASTERISK * +META_ASTERISK_PLUS *+ +META_ASTERISK_QUERY *? +META_ATOMIC (?> start of atomic group +META_CIRCUMFLEX ^ metacharacter +META_CLASS [ start of non-empty class +META_CLASS_EMPTY [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS +META_CLASS_EMPTY_NOT [^] negative empty class - ditto +META_CLASS_END ] end of non-empty class +META_CLASS_NOT [^ start non-empty negative class +META_COMMIT (*COMMIT) +META_COND_ASSERT (?(?assertion) +META_DOLLAR $ metacharacter +META_DOT . metacharacter +META_END End of pattern (this value is 0x80000000) +META_FAIL (*FAIL) +META_KET ) closing parenthesis +META_LOOKAHEAD (?= start of lookahead +META_LOOKAHEADNOT (?! start of negative lookahead +META_NOCAPTURE (?: no capture parens +META_PLUS + +META_PLUS_PLUS ++ +META_PLUS_QUERY +? +META_PRUNE (*PRUNE) - no argument +META_QUERY ? +META_QUERY_PLUS ?+ +META_QUERY_QUERY ?? +META_RANGE_ESCAPED hyphen in class range with at least one escape +META_RANGE_LITERAL hyphen in class range defined literally +META_SKIP (*SKIP) - no argument +META_THEN (*THEN) - no argument + +The two RANGE values occur only in character classes. They are positioned +between two literals that define the start and end of the range. In an EBCDIC +evironment it is necessary to know whether either of the range values was +specified as an escape. In an ASCII/Unicode environment the distinction is not +relevant. + +The following have data in the lower 16 bits, and may be followed by other data +elements: + +META_ALT | alternation +META_BACKREF +META_CAPTURE +META_ESCAPE +META_RECURSE + +If the data for META_ALT is non-zero, it is inside a lookbehind, and the data +is the length of its branch, for which OP_REVERSE must be generated. + +META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as +their data in the lower 16 bits of the element. + +META_BACKREF is followed by an offset if the back reference group number is 10 +or more. The offsets of the first ocurrences of references to groups whose +numbers are less than 10 are put in cb->small_ref_offset[] (only the first +occurrence is useful). On 64-bit systems this avoids using more than two parsed +pattern elements for items such as \3. The offset is used when an error is +given for a reference to a non-existent group. + +META_RECURSE is always followed by an offset, for use in error messages. + +META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next +element contains the 16-bit type and data property values, packed together. +ESC_g and ESC_k are used only for named references - numerical ones are turned +into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed +by a length and an offset into the pattern to specify the name. + +The following have one data item that follows in the next vector element: + +META_BIGVALUE Next is a literal >= META_END +META_OPTIONS (?i) and friends (data is new option bits) +META_POSIX POSIX class item (data identifies the class) +META_POSIX_NEG negative POSIX class item (ditto) + +The following are followed by a length element, then a number of character code +values (which should match with the length): + +META_MARK (*MARK:xxxx) +META_PRUNE_ARG (*PRUNE:xxx) +META_SKIP_ARG (*SKIP:xxxx) +META_THEN_ARG (*THEN:xxxx) + +The following are followed by a length element, then an offset in the pattern +that identifies the name: + +META_COND_NAME (?() or (?('name') or (?(name) +META_COND_RNAME (?(R&name) +META_COND_RNUMBER (?(Rdigits) +META_RECURSE_BYNAME (?&name) +META_BACKREF_BYNAME \k'name' + +META_COND_RNUMBER is used for names that start with R and continue with digits, +because this is an ambiguous case. It could be a back reference to a group with +that name, or it could be a recursion test on a numbered group. + +This one is followed by an offset, for use in error messages, then a number: + +META_COND_NUMBER (?([+-]digits) + +The following is followed just by an offset, for use in error messages: + +META_COND_DEFINE (?(DEFINE) + +The following are also followed just by an offset, but also the lower 16 bits +of the main word contain the length of the first branch of the lookbehind +group; this is used when generating OP_REVERSE for that branch. + +META_LOOKBEHIND (?<= +META_LOOKBEHINDNOT (?' and 1 for '>='; +the next two are the major and minor numbers: + +META_COND_VERSION (?(VERSIONx.y) + +Callouts are converted into one of two items: + +META_CALLOUT_NUMBER (?C with numerical argument +META_CALLOUT_STRING (?C with string argument + +In both cases, the next two elements contain the offset and length of the next +item in the pattern. Then there is either one callout number, or a length and +an offset for the string argument. The length includes both delimiters. Traditional matching function @@ -225,9 +414,14 @@ These items are all just one unit long OP_THEN ) OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion. -This ends the assertion, not the entire pattern match. The assertion (?!) is +This ends the assertion, not the entire pattern match. The assertion (?!) is always optimized to OP_FAIL. +OP_ALLANY is used for '.' when PCRE2_DOTALL is set. It is also used for \C in +non-UTF modes and in UTF-32 mode (since one code unit still equals one +character). Another use is for [^] when empty classes are permitted +(PCRE2_ALLOW_EMPTY_CLASS is set). + Backtracking control verbs with optional data --------------------------------------------- @@ -545,8 +739,8 @@ immediately before the assertion. It is also possible to insert a manual callout at this point. Only assertion conditions may have callouts preceding the condition. -A condition that is the negative assertion (?!) is optimized to OP_FAIL in all -parts of the pattern, so this is another opcode that may appear as a condition. +A condition that is the negative assertion (?!) is optimized to OP_FAIL in all +parts of the pattern, so this is another opcode that may appear as a condition. It is treated the same as OP_FALSE. @@ -588,10 +782,10 @@ the actual string is passed, but the delimiter can be accessed as string[-1] if the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is compiled as the following bytes (decimal numbers represent binary values): - [OP_CALLOUT] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0] - -------- ------- -------- ------- - | | | | - ------- LINK_SIZE items ------ + [OP_CALLOUT_STR] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0] + -------- ------- -------- ------- + | | | | + ------- LINK_SIZE items ------ Opcode table checking --------------------- @@ -601,4 +795,4 @@ not a real opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors. Philip Hazel -June 2015 +November 2016 diff --git a/pcre2/LICENCE b/pcre2/LICENCE index 30416d843..402fe2435 100644 --- a/pcre2/LICENCE +++ b/pcre2/LICENCE @@ -25,7 +25,7 @@ Email domain: cam.ac.uk University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2015 University of Cambridge +Copyright (c) 1997-2017 University of Cambridge All rights reserved. @@ -36,7 +36,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2015 Zoltan Herczeg +Copyright(c) 2010-2017 Zoltan Herczeg All rights reserved. @@ -47,7 +47,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2015 Zoltan Herczeg +Copyright(c) 2009-2017 Zoltan Herczeg All rights reserved. diff --git a/pcre2/Makefile.am b/pcre2/Makefile.am index 56f93db2e..c7e446e4b 100644 --- a/pcre2/Makefile.am +++ b/pcre2/Makefile.am @@ -2,7 +2,10 @@ AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 -AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src + +## This seems to have become necessary for building in non-source directory. + +AM_CPPFLAGS="-I$(srcdir)/src" ## Specify the documentation files that are distributed. @@ -25,6 +28,8 @@ dist_html_DATA = \ doc/html/pcre2-config.html \ doc/html/pcre2.html \ doc/html/pcre2_callout_enumerate.html \ + doc/html/pcre2_code_copy.html \ + doc/html/pcre2_code_copy_with_tables.html \ doc/html/pcre2_code_free.html \ doc/html/pcre2_compile.html \ doc/html/pcre2_compile_context_copy.html \ @@ -64,6 +69,8 @@ dist_html_DATA = \ doc/html/pcre2_set_character_tables.html \ doc/html/pcre2_set_compile_recursion_guard.html \ doc/html/pcre2_set_match_limit.html \ + doc/html/pcre2_set_max_pattern_length.html \ + doc/html/pcre2_set_offset_limit.html \ doc/html/pcre2_set_newline.html \ doc/html/pcre2_set_parens_nest_limit.html \ doc/html/pcre2_set_recursion_limit.html \ @@ -104,6 +111,8 @@ dist_man_MANS = \ doc/pcre2-config.1 \ doc/pcre2.3 \ doc/pcre2_callout_enumerate.3 \ + doc/pcre2_code_copy.3 \ + doc/pcre2_code_copy_with_tables.3 \ doc/pcre2_code_free.3 \ doc/pcre2_compile.3 \ doc/pcre2_compile_context_copy.3 \ @@ -143,6 +152,8 @@ dist_man_MANS = \ doc/pcre2_set_character_tables.3 \ doc/pcre2_set_compile_recursion_guard.3 \ doc/pcre2_set_match_limit.3 \ + doc/pcre2_set_max_pattern_length.3 \ + doc/pcre2_set_offset_limit.3 \ doc/pcre2_set_newline.3 \ doc/pcre2_set_parens_nest_limit.3 \ doc/pcre2_set_recursion_limit.3 \ @@ -319,6 +330,7 @@ COMMON_SOURCES = \ src/pcre2_context.c \ src/pcre2_dfa_match.c \ src/pcre2_error.c \ + src/pcre2_find_bracket.c \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ src/pcre2_jit_compile.c \ @@ -466,7 +478,7 @@ libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS) endif # WITH_GCOV endif # WITH_PCRE2_8 -## Build pcre2grep if the 8-bit library is enabled +## Build pcre2grep and optional fuzzer stuff if the 8-bit library is enabled if WITH_PCRE2_8 bin_PROGRAMS += pcre2grep @@ -478,6 +490,22 @@ if WITH_GCOV pcre2grep_CFLAGS += $(GCOV_CFLAGS) pcre2grep_LDADD += $(GCOV_LIBS) endif # WITH_GCOV + +## If fuzzer support is enabled, build a non-distributed library containing the +## fuzzing function. Also build the standalone checking binary from the same +## source but using -DSTANDALONE. + +if WITH_FUZZ_SUPPORT +noinst_LIBRARIES = .libs/libpcre2-fuzzsupport.a +_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) +_libs_libpcre2_fuzzsupport_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck +pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS) +pcre2fuzzcheck_LDADD = libpcre2-8.la +endif # WITH FUZZ_SUPPORT endif # WITH_PCRE2_8 ## -------- Testing ---------- @@ -538,17 +566,17 @@ endif # WITH_GCOV ## The main library tests. Each test is a binary plus a script that runs that ## binary in various ways. We install these test binaries in case folks find it -## helpful. +## helpful. The two .bat files are for running the tests under Windows. TESTS += RunTest -dist_noinst_SCRIPTS += RunTest - EXTRA_DIST += RunTest.bat +dist_noinst_SCRIPTS += RunTest ## When the 8-bit library is configured, pcre2grep will have been built. if WITH_PCRE2_8 TESTS += RunGrepTest +EXTRA_DIST += RunGrepTest.bat dist_noinst_SCRIPTS += RunGrepTest endif # WITH_PCRE2_8 @@ -565,6 +593,7 @@ EXTRA_DIST += \ testdata/greplist \ testdata/grepoutput \ testdata/grepoutput8 \ + testdata/grepoutputC \ testdata/grepoutputN \ testdata/greppatN4 \ testdata/testinput1 \ @@ -586,6 +615,10 @@ EXTRA_DIST += \ testdata/testinput17 \ testdata/testinput18 \ testdata/testinput19 \ + testdata/testinput20 \ + testdata/testinput21 \ + testdata/testinput22 \ + testdata/testinput23 \ testdata/testinputEBC \ testdata/testoutput1 \ testdata/testoutput2 \ @@ -596,9 +629,15 @@ EXTRA_DIST += \ testdata/testoutput5 \ testdata/testoutput6 \ testdata/testoutput7 \ - testdata/testoutput8-16 \ - testdata/testoutput8-32 \ - testdata/testoutput8-8 \ + testdata/testoutput8-16-2 \ + testdata/testoutput8-16-3 \ + testdata/testoutput8-16-3 \ + testdata/testoutput8-32-2 \ + testdata/testoutput8-32-3 \ + testdata/testoutput8-32-4 \ + testdata/testoutput8-8-2 \ + testdata/testoutput8-8-3 \ + testdata/testoutput8-8-4 \ testdata/testoutput9 \ testdata/testoutput10 \ testdata/testoutput11-16 \ @@ -606,13 +645,22 @@ EXTRA_DIST += \ testdata/testoutput12-16 \ testdata/testoutput12-32 \ testdata/testoutput13 \ - testdata/testoutput14 \ + testdata/testoutput14-16 \ + testdata/testoutput14-32 \ + testdata/testoutput14-8 \ testdata/testoutput15 \ testdata/testoutput16 \ testdata/testoutput17 \ testdata/testoutput18 \ testdata/testoutput19 \ + testdata/testoutput20 \ + testdata/testoutput21 \ + testdata/testoutput22-16 \ + testdata/testoutput22-32 \ + testdata/testoutput22-8 \ + testdata/testoutput23 \ testdata/testoutputEBC \ + testdata/valgrind-jit.supp \ testdata/wintestinput3 \ testdata/wintestoutput3 \ perltest.sh diff --git a/pcre2/Makefile.in b/pcre2/Makefile.in index 9c24e5a2e..b1367f8f0 100644 --- a/pcre2/Makefile.in +++ b/pcre2/Makefile.in @@ -18,6 +18,7 @@ + VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ @@ -92,9 +93,9 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ -TESTS = $(am__EXEEXT_3) RunTest $(am__append_29) +TESTS = $(am__EXEEXT_4) RunTest $(am__append_30) bin_PROGRAMS = $(am__EXEEXT_1) pcre2test$(EXEEXT) -noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) +noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) @WITH_REBUILD_CHARTABLES_TRUE@am__append_1 = dftables @WITH_PCRE2_8_TRUE@am__append_2 = libpcre2-8.la @WITH_PCRE2_16_TRUE@am__append_3 = libpcre2-16.la @@ -110,25 +111,27 @@ noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) @WITH_PCRE2_8_TRUE@am__append_13 = pcre2grep @WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_14 = $(GCOV_CFLAGS) @WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_15 = $(GCOV_LIBS) -@WITH_JIT_TRUE@am__append_16 = pcre2_jit_test +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__append_16 = pcre2fuzzcheck @WITH_JIT_TRUE@am__append_17 = pcre2_jit_test -@WITH_JIT_TRUE@@WITH_PCRE2_8_TRUE@am__append_18 = libpcre2-8.la -@WITH_JIT_TRUE@@WITH_PCRE2_16_TRUE@am__append_19 = libpcre2-16.la -@WITH_JIT_TRUE@@WITH_PCRE2_32_TRUE@am__append_20 = libpcre2-32.la -@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_21 = $(GCOV_CFLAGS) -@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_22 = $(GCOV_LIBS) -@WITH_PCRE2_8_TRUE@am__append_23 = libpcre2-8.la libpcre2-posix.la -@WITH_PCRE2_16_TRUE@am__append_24 = libpcre2-16.la -@WITH_PCRE2_32_TRUE@am__append_25 = libpcre2-32.la -@WITH_VALGRIND_TRUE@am__append_26 = $(VALGRIND_CFLAGS) -@WITH_GCOV_TRUE@am__append_27 = $(GCOV_CFLAGS) -@WITH_GCOV_TRUE@am__append_28 = $(GCOV_LIBS) -@WITH_PCRE2_8_TRUE@am__append_29 = RunGrepTest +@WITH_JIT_TRUE@am__append_18 = pcre2_jit_test +@WITH_JIT_TRUE@@WITH_PCRE2_8_TRUE@am__append_19 = libpcre2-8.la +@WITH_JIT_TRUE@@WITH_PCRE2_16_TRUE@am__append_20 = libpcre2-16.la +@WITH_JIT_TRUE@@WITH_PCRE2_32_TRUE@am__append_21 = libpcre2-32.la +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_22 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_23 = $(GCOV_LIBS) +@WITH_PCRE2_8_TRUE@am__append_24 = libpcre2-8.la libpcre2-posix.la +@WITH_PCRE2_16_TRUE@am__append_25 = libpcre2-16.la +@WITH_PCRE2_32_TRUE@am__append_26 = libpcre2-32.la +@WITH_VALGRIND_TRUE@am__append_27 = $(VALGRIND_CFLAGS) +@WITH_GCOV_TRUE@am__append_28 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@am__append_29 = $(GCOV_LIBS) @WITH_PCRE2_8_TRUE@am__append_30 = RunGrepTest -@WITH_PCRE2_8_TRUE@am__append_31 = libpcre2-8.pc libpcre2-posix.pc -@WITH_PCRE2_16_TRUE@am__append_32 = libpcre2-16.pc -@WITH_PCRE2_32_TRUE@am__append_33 = libpcre2-32.pc -@WITH_GCOV_FALSE@am__append_34 = src/*.gcda src/*.gcno +@WITH_PCRE2_8_TRUE@am__append_31 = RunGrepTest.bat +@WITH_PCRE2_8_TRUE@am__append_32 = RunGrepTest +@WITH_PCRE2_8_TRUE@am__append_33 = libpcre2-8.pc libpcre2-posix.pc +@WITH_PCRE2_16_TRUE@am__append_34 = libpcre2-16.pc +@WITH_PCRE2_32_TRUE@am__append_35 = libpcre2-32.pc +@WITH_GCOV_FALSE@am__append_36 = src/*.gcda src/*.gcno subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/ax_pthread.m4 \ @@ -150,6 +153,20 @@ CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = libpcre2-8.pc libpcre2-16.pc libpcre2-32.pc \ libpcre2-posix.pc pcre2-config src/pcre2.h CONFIG_CLEAN_VPATH_FILES = +LIBRARIES = $(noinst_LIBRARIES) +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +_libs_libpcre2_fuzzsupport_a_AR = $(AR) $(ARFLAGS) +_libs_libpcre2_fuzzsupport_a_DEPENDENCIES = +am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST = \ + src/pcre2_fuzzsupport.c +am__dirstamp = $(am__leading_dot)dirstamp +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__libs_libpcre2_fuzzsupport_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT) +_libs_libpcre2_fuzzsupport_a_OBJECTS = \ + $(am__libs_libpcre2_fuzzsupport_a_OBJECTS) am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ @@ -186,7 +203,8 @@ LTLIBRARIES = $(lib_LTLIBRARIES) libpcre2_16_la_DEPENDENCIES = am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \ src/pcre2_compile.c src/pcre2_config.c src/pcre2_context.c \ - src/pcre2_dfa_match.c src/pcre2_error.c src/pcre2_internal.h \ + src/pcre2_dfa_match.c src/pcre2_error.c \ + src/pcre2_find_bracket.c src/pcre2_internal.h \ src/pcre2_intmodedep.h src/pcre2_jit_compile.c \ src/pcre2_maketables.c src/pcre2_match.c \ src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ @@ -195,13 +213,13 @@ am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \ src/pcre2_substitute.c src/pcre2_substring.c \ src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ src/pcre2_valid_utf.c src/pcre2_xclass.c -am__dirstamp = $(am__leading_dot)dirstamp am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \ src/libpcre2_16_la-pcre2_compile.lo \ src/libpcre2_16_la-pcre2_config.lo \ src/libpcre2_16_la-pcre2_context.lo \ src/libpcre2_16_la-pcre2_dfa_match.lo \ src/libpcre2_16_la-pcre2_error.lo \ + src/libpcre2_16_la-pcre2_find_bracket.lo \ src/libpcre2_16_la-pcre2_jit_compile.lo \ src/libpcre2_16_la-pcre2_maketables.lo \ src/libpcre2_16_la-pcre2_match.lo \ @@ -235,7 +253,8 @@ libpcre2_16_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ libpcre2_32_la_DEPENDENCIES = am__libpcre2_32_la_SOURCES_DIST = src/pcre2_auto_possess.c \ src/pcre2_compile.c src/pcre2_config.c src/pcre2_context.c \ - src/pcre2_dfa_match.c src/pcre2_error.c src/pcre2_internal.h \ + src/pcre2_dfa_match.c src/pcre2_error.c \ + src/pcre2_find_bracket.c src/pcre2_internal.h \ src/pcre2_intmodedep.h src/pcre2_jit_compile.c \ src/pcre2_maketables.c src/pcre2_match.c \ src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ @@ -250,6 +269,7 @@ am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \ src/libpcre2_32_la-pcre2_context.lo \ src/libpcre2_32_la-pcre2_dfa_match.lo \ src/libpcre2_32_la-pcre2_error.lo \ + src/libpcre2_32_la-pcre2_find_bracket.lo \ src/libpcre2_32_la-pcre2_jit_compile.lo \ src/libpcre2_32_la-pcre2_maketables.lo \ src/libpcre2_32_la-pcre2_match.lo \ @@ -279,7 +299,8 @@ libpcre2_32_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ libpcre2_8_la_DEPENDENCIES = am__libpcre2_8_la_SOURCES_DIST = src/pcre2_auto_possess.c \ src/pcre2_compile.c src/pcre2_config.c src/pcre2_context.c \ - src/pcre2_dfa_match.c src/pcre2_error.c src/pcre2_internal.h \ + src/pcre2_dfa_match.c src/pcre2_error.c \ + src/pcre2_find_bracket.c src/pcre2_internal.h \ src/pcre2_intmodedep.h src/pcre2_jit_compile.c \ src/pcre2_maketables.c src/pcre2_match.c \ src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ @@ -294,6 +315,7 @@ am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \ src/libpcre2_8_la-pcre2_context.lo \ src/libpcre2_8_la-pcre2_dfa_match.lo \ src/libpcre2_8_la-pcre2_error.lo \ + src/libpcre2_8_la-pcre2_find_bracket.lo \ src/libpcre2_8_la-pcre2_jit_compile.lo \ src/libpcre2_8_la-pcre2_maketables.lo \ src/libpcre2_8_la-pcre2_match.lo \ @@ -331,7 +353,8 @@ libpcre2_posix_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ @WITH_PCRE2_8_TRUE@am_libpcre2_posix_la_rpath = -rpath $(libdir) @WITH_PCRE2_8_TRUE@am__EXEEXT_1 = pcre2grep$(EXEEXT) @WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_2 = dftables$(EXEEXT) -@WITH_JIT_TRUE@am__EXEEXT_3 = pcre2_jit_test$(EXEEXT) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__EXEEXT_3 = pcre2fuzzcheck$(EXEEXT) +@WITH_JIT_TRUE@am__EXEEXT_4 = pcre2_jit_test$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) am__dftables_SOURCES_DIST = src/dftables.c @WITH_REBUILD_CHARTABLES_TRUE@am_dftables_OBJECTS = \ @@ -345,13 +368,22 @@ pcre2_jit_test_OBJECTS = $(am_pcre2_jit_test_OBJECTS) am__DEPENDENCIES_1 = @WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__DEPENDENCIES_2 = \ @WITH_GCOV_TRUE@@WITH_JIT_TRUE@ $(am__DEPENDENCIES_1) -@WITH_JIT_TRUE@pcre2_jit_test_DEPENDENCIES = $(am__append_18) \ -@WITH_JIT_TRUE@ $(am__append_19) $(am__append_20) \ +@WITH_JIT_TRUE@pcre2_jit_test_DEPENDENCIES = $(am__append_19) \ +@WITH_JIT_TRUE@ $(am__append_20) $(am__append_21) \ @WITH_JIT_TRUE@ $(am__DEPENDENCIES_2) pcre2_jit_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(pcre2_jit_test_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o \ $@ +am__pcre2fuzzcheck_SOURCES_DIST = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am_pcre2fuzzcheck_OBJECTS = src/pcre2fuzzcheck-pcre2_fuzzsupport.$(OBJEXT) +pcre2fuzzcheck_OBJECTS = $(am_pcre2fuzzcheck_OBJECTS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_DEPENDENCIES = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la +pcre2fuzzcheck_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2fuzzcheck_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o \ + $@ am__pcre2grep_SOURCES_DIST = src/pcre2grep.c @WITH_PCRE2_8_TRUE@am_pcre2grep_OBJECTS = \ @WITH_PCRE2_8_TRUE@ src/pcre2grep-pcre2grep.$(OBJEXT) @@ -367,8 +399,8 @@ pcre2grep_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ am_pcre2test_OBJECTS = src/pcre2test-pcre2test.$(OBJEXT) pcre2test_OBJECTS = $(am_pcre2test_OBJECTS) @WITH_GCOV_TRUE@am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) -pcre2test_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__append_23) \ - $(am__append_24) $(am__append_25) $(am__DEPENDENCIES_4) +pcre2test_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__append_24) \ + $(am__append_25) $(am__append_26) $(am__DEPENDENCIES_4) pcre2test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(pcre2test_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ @@ -408,18 +440,21 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(libpcre2_16_la_SOURCES) $(nodist_libpcre2_16_la_SOURCES) \ +SOURCES = $(_libs_libpcre2_fuzzsupport_a_SOURCES) \ + $(libpcre2_16_la_SOURCES) $(nodist_libpcre2_16_la_SOURCES) \ $(libpcre2_32_la_SOURCES) $(nodist_libpcre2_32_la_SOURCES) \ $(libpcre2_8_la_SOURCES) $(nodist_libpcre2_8_la_SOURCES) \ $(libpcre2_posix_la_SOURCES) $(dftables_SOURCES) \ - $(pcre2_jit_test_SOURCES) $(pcre2grep_SOURCES) \ - $(pcre2test_SOURCES) -DIST_SOURCES = $(am__libpcre2_16_la_SOURCES_DIST) \ + $(pcre2_jit_test_SOURCES) $(pcre2fuzzcheck_SOURCES) \ + $(pcre2grep_SOURCES) $(pcre2test_SOURCES) +DIST_SOURCES = $(am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST) \ + $(am__libpcre2_16_la_SOURCES_DIST) \ $(am__libpcre2_32_la_SOURCES_DIST) \ $(am__libpcre2_8_la_SOURCES_DIST) \ $(am__libpcre2_posix_la_SOURCES_DIST) \ $(am__dftables_SOURCES_DIST) \ $(am__pcre2_jit_test_SOURCES_DIST) \ + $(am__pcre2fuzzcheck_SOURCES_DIST) \ $(am__pcre2grep_SOURCES_DIST) $(pcre2test_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ @@ -805,7 +840,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 -AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src +AM_CPPFLAGS = "-I$(srcdir)/src" dist_doc_DATA = \ AUTHORS \ COPYING \ @@ -825,6 +860,8 @@ dist_html_DATA = \ doc/html/pcre2-config.html \ doc/html/pcre2.html \ doc/html/pcre2_callout_enumerate.html \ + doc/html/pcre2_code_copy.html \ + doc/html/pcre2_code_copy_with_tables.html \ doc/html/pcre2_code_free.html \ doc/html/pcre2_compile.html \ doc/html/pcre2_compile_context_copy.html \ @@ -864,6 +901,8 @@ dist_html_DATA = \ doc/html/pcre2_set_character_tables.html \ doc/html/pcre2_set_compile_recursion_guard.html \ doc/html/pcre2_set_match_limit.html \ + doc/html/pcre2_set_max_pattern_length.html \ + doc/html/pcre2_set_offset_limit.html \ doc/html/pcre2_set_newline.html \ doc/html/pcre2_set_parens_nest_limit.html \ doc/html/pcre2_set_recursion_limit.html \ @@ -904,6 +943,8 @@ dist_man_MANS = \ doc/pcre2-config.1 \ doc/pcre2.3 \ doc/pcre2_callout_enumerate.3 \ + doc/pcre2_code_copy.3 \ + doc/pcre2_code_copy_with_tables.3 \ doc/pcre2_code_free.3 \ doc/pcre2_compile.3 \ doc/pcre2_compile_context_copy.3 \ @@ -943,6 +984,8 @@ dist_man_MANS = \ doc/pcre2_set_character_tables.3 \ doc/pcre2_set_compile_recursion_guard.3 \ doc/pcre2_set_match_limit.3 \ + doc/pcre2_set_max_pattern_length.3 \ + doc/pcre2_set_offset_limit.3 \ doc/pcre2_set_newline.3 \ doc/pcre2_set_parens_nest_limit.3 \ doc/pcre2_set_recursion_limit.3 \ @@ -984,7 +1027,7 @@ dist_man_MANS = \ lib_LTLIBRARIES = $(am__append_2) $(am__append_3) $(am__append_4) \ $(am__append_11) check_SCRIPTS = -dist_noinst_SCRIPTS = RunTest $(am__append_30) +dist_noinst_SCRIPTS = RunTest $(am__append_32) # Additional files to delete on 'make clean', 'make distclean', # and 'make maintainer-clean'. @@ -995,7 +1038,7 @@ CLEANFILES = src/pcre2_chartables.c testSinput test3input test3output \ test3outputA test3outputB testtry teststdout teststderr \ teststderrgrep testtemp1grep testtemp2grep testtrygrep \ testNinputgrep -DISTCLEANFILES = src/config.h.in~ config.h $(am__append_34) +DISTCLEANFILES = src/config.h.in~ config.h $(am__append_36) MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic # Additional files to bundle with the distribution, over and above what @@ -1040,32 +1083,42 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ src/sljit/sljitNativeX86_64.c \ src/sljit/sljitNativeX86_common.c src/sljit/sljitUtils.c \ src/pcre2_jit_match.c src/pcre2_jit_misc.c \ - src/pcre2_printint.c RunTest.bat testdata/grepbinary \ - testdata/grepfilelist testdata/grepinput testdata/grepinput3 \ - testdata/grepinput8 testdata/grepinputv testdata/grepinputx \ - testdata/greplist testdata/grepoutput testdata/grepoutput8 \ - testdata/grepoutputN testdata/greppatN4 testdata/testinput1 \ - testdata/testinput2 testdata/testinput3 testdata/testinput4 \ - testdata/testinput5 testdata/testinput6 testdata/testinput7 \ - testdata/testinput8 testdata/testinput9 testdata/testinput10 \ - testdata/testinput11 testdata/testinput12 testdata/testinput13 \ - testdata/testinput14 testdata/testinput15 testdata/testinput16 \ - testdata/testinput17 testdata/testinput18 testdata/testinput19 \ + src/pcre2_printint.c RunTest.bat $(am__append_31) \ + testdata/grepbinary testdata/grepfilelist testdata/grepinput \ + testdata/grepinput3 testdata/grepinput8 testdata/grepinputv \ + testdata/grepinputx testdata/greplist testdata/grepoutput \ + testdata/grepoutput8 testdata/grepoutputC testdata/grepoutputN \ + testdata/greppatN4 testdata/testinput1 testdata/testinput2 \ + testdata/testinput3 testdata/testinput4 testdata/testinput5 \ + testdata/testinput6 testdata/testinput7 testdata/testinput8 \ + testdata/testinput9 testdata/testinput10 testdata/testinput11 \ + testdata/testinput12 testdata/testinput13 testdata/testinput14 \ + testdata/testinput15 testdata/testinput16 testdata/testinput17 \ + testdata/testinput18 testdata/testinput19 testdata/testinput20 \ + testdata/testinput21 testdata/testinput22 testdata/testinput23 \ testdata/testinputEBC testdata/testoutput1 \ testdata/testoutput2 testdata/testoutput3 \ testdata/testoutput3A testdata/testoutput3B \ testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \ - testdata/testoutput7 testdata/testoutput8-16 \ - testdata/testoutput8-32 testdata/testoutput8-8 \ + testdata/testoutput7 testdata/testoutput8-16-2 \ + testdata/testoutput8-16-3 testdata/testoutput8-16-3 \ + testdata/testoutput8-32-2 testdata/testoutput8-32-3 \ + testdata/testoutput8-32-4 testdata/testoutput8-8-2 \ + testdata/testoutput8-8-3 testdata/testoutput8-8-4 \ testdata/testoutput9 testdata/testoutput10 \ testdata/testoutput11-16 testdata/testoutput11-32 \ testdata/testoutput12-16 testdata/testoutput12-32 \ - testdata/testoutput13 testdata/testoutput14 \ + testdata/testoutput13 testdata/testoutput14-16 \ + testdata/testoutput14-32 testdata/testoutput14-8 \ testdata/testoutput15 testdata/testoutput16 \ testdata/testoutput17 testdata/testoutput18 \ - testdata/testoutput19 testdata/testoutputEBC \ - testdata/wintestinput3 testdata/wintestoutput3 perltest.sh \ - src/pcre2demo.c cmake/COPYING-CMAKE-SCRIPTS \ + testdata/testoutput19 testdata/testoutput20 \ + testdata/testoutput21 testdata/testoutput22-16 \ + testdata/testoutput22-32 testdata/testoutput22-8 \ + testdata/testoutput23 testdata/testoutputEBC \ + testdata/valgrind-jit.supp testdata/wintestinput3 \ + testdata/wintestoutput3 perltest.sh src/pcre2demo.c \ + cmake/COPYING-CMAKE-SCRIPTS \ cmake/FindPackageHandleStandardArgs.cmake \ cmake/FindReadline.cmake cmake/FindEditline.cmake \ CMakeLists.txt config-cmake.h.in @@ -1087,6 +1140,7 @@ COMMON_SOURCES = \ src/pcre2_context.c \ src/pcre2_dfa_match.c \ src/pcre2_error.c \ + src/pcre2_find_bracket.c \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ src/pcre2_jit_compile.c \ @@ -1153,19 +1207,26 @@ COMMON_SOURCES = \ @WITH_PCRE2_8_TRUE@pcre2grep_CFLAGS = $(AM_CFLAGS) $(am__append_14) @WITH_PCRE2_8_TRUE@pcre2grep_LDADD = $(LIBZ) $(LIBBZ2) libpcre2-8.la \ @WITH_PCRE2_8_TRUE@ $(am__append_15) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@noinst_LIBRARIES = .libs/libpcre2-fuzzsupport.a +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_LIBADD = +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_LDADD = libpcre2-8.la @WITH_JIT_TRUE@pcre2_jit_test_SOURCES = src/pcre2_jit_test.c -@WITH_JIT_TRUE@pcre2_jit_test_CFLAGS = $(AM_CFLAGS) $(am__append_21) -@WITH_JIT_TRUE@pcre2_jit_test_LDADD = $(am__append_18) \ -@WITH_JIT_TRUE@ $(am__append_19) $(am__append_20) \ -@WITH_JIT_TRUE@ $(am__append_22) +@WITH_JIT_TRUE@pcre2_jit_test_CFLAGS = $(AM_CFLAGS) $(am__append_22) +@WITH_JIT_TRUE@pcre2_jit_test_LDADD = $(am__append_19) \ +@WITH_JIT_TRUE@ $(am__append_20) $(am__append_21) \ +@WITH_JIT_TRUE@ $(am__append_23) pcre2test_SOURCES = src/pcre2test.c -pcre2test_CFLAGS = $(AM_CFLAGS) $(am__append_26) $(am__append_27) -pcre2test_LDADD = $(LIBREADLINE) $(am__append_23) $(am__append_24) \ - $(am__append_25) $(am__append_28) +pcre2test_CFLAGS = $(AM_CFLAGS) $(am__append_27) $(am__append_28) +pcre2test_LDADD = $(LIBREADLINE) $(am__append_24) $(am__append_25) \ + $(am__append_26) $(am__append_29) # We have .pc files for pkg-config users. pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = $(am__append_31) $(am__append_32) $(am__append_33) +pkgconfig_DATA = $(am__append_33) $(am__append_34) $(am__append_35) # gcov/lcov code coverage reporting # @@ -1256,6 +1317,25 @@ pcre2-config: $(top_builddir)/config.status $(srcdir)/pcre2-config.in src/pcre2.h: $(top_builddir)/config.status $(top_srcdir)/src/pcre2.h.in cd $(top_builddir) && $(SHELL) ./config.status $@ +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) +src/$(am__dirstamp): + @$(MKDIR_P) src + @: > src/$(am__dirstamp) +src/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) src/$(DEPDIR) + @: > src/$(DEPDIR)/$(am__dirstamp) +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +.libs/$(am__dirstamp): + @$(MKDIR_P) .libs + @: > .libs/$(am__dirstamp) + +.libs/libpcre2-fuzzsupport.a: $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_a_DEPENDENCIES) .libs/$(am__dirstamp) + $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport.a + $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_a_AR) .libs/libpcre2-fuzzsupport.a $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_LIBADD) + $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport.a + install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ @@ -1290,12 +1370,6 @@ clean-libLTLIBRARIES: echo rm -f $${locs}; \ rm -f $${locs}; \ } -src/$(am__dirstamp): - @$(MKDIR_P) src - @: > src/$(am__dirstamp) -src/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) src/$(DEPDIR) - @: > src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_compile.lo: src/$(am__dirstamp) \ @@ -1308,6 +1382,8 @@ src/libpcre2_16_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_error.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_maketables.lo: src/$(am__dirstamp) \ @@ -1357,6 +1433,8 @@ src/libpcre2_32_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_error.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_maketables.lo: src/$(am__dirstamp) \ @@ -1406,6 +1484,8 @@ src/libpcre2_8_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_error.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_maketables.lo: src/$(am__dirstamp) \ @@ -1518,6 +1598,12 @@ src/pcre2_jit_test-pcre2_jit_test.$(OBJEXT): src/$(am__dirstamp) \ pcre2_jit_test$(EXEEXT): $(pcre2_jit_test_OBJECTS) $(pcre2_jit_test_DEPENDENCIES) $(EXTRA_pcre2_jit_test_DEPENDENCIES) @rm -f pcre2_jit_test$(EXEEXT) $(AM_V_CCLD)$(pcre2_jit_test_LINK) $(pcre2_jit_test_OBJECTS) $(pcre2_jit_test_LDADD) $(LIBS) +src/pcre2fuzzcheck-pcre2_fuzzsupport.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2fuzzcheck$(EXEEXT): $(pcre2fuzzcheck_OBJECTS) $(pcre2fuzzcheck_DEPENDENCIES) $(EXTRA_pcre2fuzzcheck_DEPENDENCIES) + @rm -f pcre2fuzzcheck$(EXEEXT) + $(AM_V_CCLD)$(pcre2fuzzcheck_LINK) $(pcre2fuzzcheck_OBJECTS) $(pcre2fuzzcheck_LDADD) $(LIBS) src/pcre2grep-pcre2grep.$(OBJEXT): src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) @@ -1574,6 +1660,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/dftables.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@ @@ -1582,6 +1669,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo@am__quote@ @@ -1605,6 +1693,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo@am__quote@ @@ -1628,6 +1717,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo@am__quote@ @@ -1646,6 +1736,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2grep-pcre2grep.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2test-pcre2test.Po@am__quote@ @@ -1673,6 +1764,20 @@ distclean-compile: @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + src/libpcre2_16_la-pcre2_auto_possess.lo: src/pcre2_auto_possess.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_auto_possess.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Tpo -c -o src/libpcre2_16_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo @@ -1715,6 +1820,13 @@ src/libpcre2_16_la-pcre2_error.lo: src/pcre2_error.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +src/libpcre2_16_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_16_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + src/libpcre2_16_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_16_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo @@ -1876,6 +1988,13 @@ src/libpcre2_32_la-pcre2_error.lo: src/pcre2_error.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +src/libpcre2_32_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_32_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + src/libpcre2_32_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_32_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo @@ -2037,6 +2156,13 @@ src/libpcre2_8_la-pcre2_error.lo: src/pcre2_error.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +src/libpcre2_8_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_8_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + src/libpcre2_8_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_8_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo @@ -2177,6 +2303,20 @@ src/pcre2_jit_test-pcre2_jit_test.obj: src/pcre2_jit_test.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -c -o src/pcre2_jit_test-pcre2_jit_test.obj `if test -f 'src/pcre2_jit_test.c'; then $(CYGPATH_W) 'src/pcre2_jit_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_jit_test.c'; fi` +src/pcre2fuzzcheck-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/pcre2fuzzcheck-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + src/pcre2grep-pcre2grep.o: src/pcre2grep.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -MT src/pcre2grep-pcre2grep.o -MD -MP -MF src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo -c -o src/pcre2grep-pcre2grep.o `test -f 'src/pcre2grep.c' || echo '$(srcdir)/'`src/pcre2grep.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo src/$(DEPDIR)/pcre2grep-pcre2grep.Po @@ -2682,7 +2822,7 @@ distdir: $(DISTFILES) ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r "$(distdir)" dist-gzip: distdir - tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz $(am__post_remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 @@ -2707,7 +2847,7 @@ dist-shar: distdir @echo WARNING: "Support for shar distribution archives is" \ "deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 - shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz $(am__post_remove_distdir) dist-zip: distdir -rm -f $(distdir).zip @@ -2724,7 +2864,7 @@ dist dist-all: distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ - GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.lz*) \ @@ -2734,7 +2874,7 @@ distcheck: dist *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ - GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ esac @@ -2808,8 +2948,8 @@ check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am -all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(MANS) $(DATA) \ - $(HEADERS) +all-am: Makefile $(LIBRARIES) $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) \ + $(MANS) $(DATA) $(HEADERS) install-binPROGRAMS: install-libLTLIBRARIES installdirs: @@ -2847,6 +2987,7 @@ clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f .libs/$(am__dirstamp) -rm -f src/$(DEPDIR)/$(am__dirstamp) -rm -f src/$(am__dirstamp) -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) @@ -2860,7 +3001,8 @@ maintainer-clean-generic: clean: clean-am clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \ - clean-libtool clean-local clean-noinstPROGRAMS mostlyclean-am + clean-libtool clean-local clean-noinstLIBRARIES \ + clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f $(am__CONFIG_DISTCLEAN_FILES) @@ -2945,29 +3087,30 @@ uninstall-man: uninstall-man1 uninstall-man3 .PHONY: CTAGS GTAGS TAGS all all-am am--refresh check check-TESTS \ check-am clean clean-binPROGRAMS clean-cscope clean-generic \ clean-libLTLIBRARIES clean-libtool clean-local \ - clean-noinstPROGRAMS cscope cscopelist-am ctags ctags-am dist \ - dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \ - dist-xz dist-zip distcheck distclean distclean-compile \ - distclean-generic distclean-hdr distclean-libtool \ - distclean-local distclean-tags distcleancheck distdir \ - distuninstallcheck dvi dvi-am html html-am info info-am \ - install install-am install-binPROGRAMS install-binSCRIPTS \ - install-data install-data-am install-dist_docDATA \ - install-dist_htmlDATA install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am \ - install-includeHEADERS install-info install-info-am \ - install-libLTLIBRARIES install-man install-man1 install-man3 \ - install-nodist_includeHEADERS install-pdf install-pdf-am \ - install-pkgconfigDATA install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - recheck tags tags-am uninstall uninstall-am \ - uninstall-binPROGRAMS uninstall-binSCRIPTS \ - uninstall-dist_docDATA uninstall-dist_htmlDATA \ - uninstall-includeHEADERS uninstall-libLTLIBRARIES \ - uninstall-man uninstall-man1 uninstall-man3 \ - uninstall-nodist_includeHEADERS uninstall-pkgconfigDATA + clean-noinstLIBRARIES clean-noinstPROGRAMS cscope \ + cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \ + dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \ + distcheck distclean distclean-compile distclean-generic \ + distclean-hdr distclean-libtool distclean-local distclean-tags \ + distcleancheck distdir distuninstallcheck dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-binSCRIPTS install-data install-data-am \ + install-dist_docDATA install-dist_htmlDATA install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-includeHEADERS install-info \ + install-info-am install-libLTLIBRARIES install-man \ + install-man1 install-man3 install-nodist_includeHEADERS \ + install-pdf install-pdf-am install-pkgconfigDATA install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ + uninstall uninstall-am uninstall-binPROGRAMS \ + uninstall-binSCRIPTS uninstall-dist_docDATA \ + uninstall-dist_htmlDATA uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES uninstall-man uninstall-man1 \ + uninstall-man3 uninstall-nodist_includeHEADERS \ + uninstall-pkgconfigDATA .PRECIOUS: Makefile diff --git a/pcre2/NEWS b/pcre2/NEWS index 84f051355..08798bb00 100644 --- a/pcre2/NEWS +++ b/pcre2/NEWS @@ -1,6 +1,105 @@ News about PCRE2 releases ------------------------- +Version 10.23 14-February-2017 +------------------------------ + +1. ChangeLog has the details of a lot of bug fixes and tidies. + +2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed (see ChangeLog for +details.) + +3. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +4. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +5. pcre2grep now automatically expands its buffer up to a maximum set by +--max-buffer-size. + +6. The -t option (grand total) has been added to pcre2grep. + +7. A new function called pcre2_code_copy_with_tables() exists to copy a +compiled pattern along with a private copy of the character tables that is +uses. + +8. A user supplied a number of patches to upgrade pcre2grep under Windows and +tidy the code. + +9. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.22 29-July-2016 +-------------------------- + +1. ChangeLog has the details of a number of bug fixes. + +2. The POSIX wrapper function regcomp() did not used to support back references +and subroutine calls if called with the REG_NOSUB option. It now does. + +3. A new function, pcre2_code_copy(), is added, to make a copy of a compiled +pattern. + +4. Support for string callouts is added to pcre2grep. + +5. Added the PCRE2_NO_JIT option to pcre2_match(). + +6. The pcre2_get_error_message() function now returns with a negative error +code if the error number it is given is unknown. + +7. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.21 12-January-2016 +----------------------------- + +1. Many bugs have been fixed. A large number of them were provoked only by very +strange pattern input, and were discovered by fuzzers. Some others were +discovered by code auditing. See ChangeLog for details. + +2. The Unicode tables have been updated to Unicode version 8.0.0. + +3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +4. There have been a number of enhancements to the pcre2_substitute() function, +giving more flexibility to replacement facilities. It is now also possible to +cause the function to return the needed buffer size if the one given is too +small. + +5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such +as (*THEN:name) to be processed for backslashes and to take note of +PCRE2_EXTENDED. + +6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a +pattern uses \C, and --never-backslash-C makes it possible to compile a version +PCRE2 in which the use of \C is always forbidden. + +7. A limit to the length of pattern that can be handled can now be set by +calling pcre2_set_max_pattern_length(). + +8. When matching an unanchored pattern, a match can be required to begin within +a given number of code units after the start of the subject by calling +pcre2_set_offset_limit(). + +9. The pcre2test program has been extended to test new facilities, and it can +now run the tests when LF on its own is not a valid newline sequence. + +10. The RunTest script has also been updated to enable more tests to be run. + +11. There have been some minor performance enhancements. + + Version 10.20 30-June-2015 -------------------------- diff --git a/pcre2/NON-AUTOTOOLS-BUILD b/pcre2/NON-AUTOTOOLS-BUILD index d8d9d2b49..e3cf8132b 100644 --- a/pcre2/NON-AUTOTOOLS-BUILD +++ b/pcre2/NON-AUTOTOOLS-BUILD @@ -97,6 +97,7 @@ can skip ahead to the CMake section. pcre2_context.c pcre2_dfa_match.c pcre2_error.c + pcre2_find_bracket.c pcre2_jit_compile.c pcre2_maketables.c pcre2_match.c @@ -173,7 +174,11 @@ can skip ahead to the CMake section. (11) If you want to use the pcre2grep command, compile and link src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not - need the pcre2posix library). + need the pcre2posix library). If you have built the PCRE2 library with JIT + support by defining SUPPORT_JIT in src/config.h, you can also define + SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless + it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without + defining SUPPORT_JIT, pcre2grep does not try to make use of JIT. STACK SIZE IN WINDOWS ENVIRONMENTS @@ -388,4 +393,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the recommended download site. ============================= -Last Updated: 15 June 2015 +Last Updated: 13 October 2016 diff --git a/pcre2/PrepareRelease b/pcre2/PrepareRelease index f6b138f87..114fce01d 100755 --- a/pcre2/PrepareRelease +++ b/pcre2/PrepareRelease @@ -65,13 +65,9 @@ End echo "Making pcre2.txt" for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \ - pcre2limits pcre2matching pcre2partial pcre2unicode ; do - -#for file in \ -# pcre2syntax \ -# pcre2precompile pcre2perform pcre2posix pcre2sample \ -# pcre2stack ; do - + pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \ + pcre2posix pcre2sample pcre2serialize pcre2stack pcre2syntax \ + pcre2unicode ; do echo " Processing $file.3" nroff -c -man $file.3 >$file.rawtxt perl ../CleanTxt <$file.rawtxt >>pcre2.txt @@ -153,7 +149,6 @@ for file in *.3 ; do [ "$base" = "pcre2stack" ] || \ [ "$base" = "pcre2compat" ] || \ [ "$base" = "pcre2limits" ] || \ - [ "$base" = "pcre2perform" ] || \ [ "$base" = "pcre2unicode" ] ; then toc="" fi @@ -204,6 +199,7 @@ files="\ src/pcre2_context.c \ src/pcre2_dfa_match.c \ src/pcre2_error.c \ + src/pcre2_find_bracket.c \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ src/pcre2_jit_compile.c \ diff --git a/pcre2/README b/pcre2/README index 7367924c8..ff2264738 100644 --- a/pcre2/README +++ b/pcre2/README @@ -44,7 +44,7 @@ wrappers. The distribution does contain a set of C wrapper functions for the 8-bit library that are based on the POSIX regular expression API (see the pcre2posix -man page). These can be found in a library called libpcre2posix. Note that this +man page). These can be found in a library called libpcre2-posix. Note that this just provides a POSIX calling interface to PCRE2; the regular expressions themselves still follow Perl syntax and semantics. The POSIX API is restricted, and does not give full access to all of PCRE2's facilities. @@ -58,8 +58,8 @@ renamed or pointed at by a link. If you are using the POSIX interface to PCRE2 and there is already a POSIX regex library installed on your system, as well as worrying about the regex.h header file (as mentioned above), you must also take care when linking programs -to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may -pick up the POSIX functions of the same name from the other library. +to ensure that they link with PCRE2's libpcre2-posix library. Otherwise they +may pick up the POSIX functions of the same name from the other library. One way of avoiding this confusion is to compile PCRE2 with the addition of -Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the @@ -168,15 +168,12 @@ library. They are also documented in the pcre2build man page. built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 to disable building the 8-bit library. -. If you want to include support for just-in-time compiling, which can give - large performance improvements on certain platforms, add --enable-jit to the - "configure" command. This support is available only for certain hardware +. If you want to include support for just-in-time (JIT) compiling, which can + give large performance improvements on certain platforms, add --enable-jit to + the "configure" command. This support is available only for certain hardware architectures. If you try to enable it on an unsupported architecture, there will be a compile time error. -. When JIT support is enabled, pcre2grep automatically makes use of it, unless - you add --disable-pcre2grep-jit to the "configure" command. - . If you do not want to make use of the support for UTF-8 Unicode character strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit library, or UTF-32 Unicode character strings in the 32-bit library, you can @@ -207,19 +204,19 @@ library. They are also documented in the pcre2build man page. --enable-newline-is-crlf, --enable-newline-is-anycrlf, or --enable-newline-is-any to the "configure" command, respectively. - If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of - the standard tests will fail, because the lines in the test files end with - LF. Even if the files are edited to change the line endings, there are likely - to be some failures. With --enable-newline-is-anycrlf or - --enable-newline-is-any, many tests should succeed, but there may be some - failures. - . By default, the sequence \R in a pattern matches any Unicode line ending sequence. This is independent of the option specifying what PCRE2 considers to be the end of a line (see above). However, the caller of PCRE2 can restrict \R to match only CR, LF, or CRLF. You can make this the default by adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R"). +. In a pattern, the escape sequence \C matches a single code unit, even in a + UTF mode. This can be dangerous because it breaks up multi-code-unit + characters. You can build PCRE2 with the use of \C permanently locked out by + adding --enable-never-backslash-C (note the upper case C) to the "configure" + command. When \C is allowed by the library, individual applications can lock + it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. + . PCRE2 has a counter that limits the depth of nesting of parentheses in a pattern. This limits the amount of system stack that a pattern uses when it is compiled. The default is 250, but you can change it by setting, for @@ -249,13 +246,13 @@ library. They are also documented in the pcre2build man page. sizes in the pcre2stack man page. . In the 8-bit library, the default maximum compiled pattern size is around - 64K. You can increase this by adding --with-link-size=3 to the "configure" - command. PCRE2 then uses three bytes instead of two for offsets to different - parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is - the same as --with-link-size=4, which (in both libraries) uses four-byte - offsets. Increasing the internal link size reduces performance in the 8-bit - and 16-bit libraries. In the 32-bit library, the link size setting is - ignored, as 4-byte offsets are always used. + 64K bytes. You can increase this by adding --with-link-size=3 to the + "configure" command. PCRE2 then uses three bytes instead of two for offsets + to different parts of the compiled pattern. In the 16-bit library, + --with-link-size=3 is the same as --with-link-size=4, which (in both + libraries) uses four-byte offsets. Increasing the internal link size reduces + performance in the 8-bit and 16-bit libraries. In the 32-bit library, the + link size setting is ignored, as 4-byte offsets are always used. . You can build PCRE2 so that its internal match() function that is called from pcre2_match() does not call itself recursively. Instead, it uses memory @@ -317,6 +314,14 @@ library. They are also documented in the pcre2build man page. running "make" to build PCRE2. There is more information about coverage reporting in the "pcre2build" documentation. +. When JIT support is enabled, pcre2grep automatically makes use of it, unless + you add --disable-pcre2grep-jit to the "configure" command. + +. On non-Windows sytems there is support for calling external scripts during + matching in the pcre2grep command via PCRE2's callout facility with string + arguments. This support can be disabled by adding --disable-pcre2grep-callout + to the "configure" command. + . The pcre2grep program currently supports only 8-bit data files, and so requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by @@ -327,12 +332,23 @@ library. They are also documented in the pcre2build man page. Of course, the relevant libraries must be installed on your system. -. The default size (in bytes) of the internal buffer used by pcre2grep can be - set by, for example: +. The default starting size (in bytes) of the internal buffer used by pcre2grep + can be set by, for example: --with-pcre2grep-bufsize=51200 - The value must be a plain integer. The default is 20480. + The value must be a plain integer. The default is 20480. The amount of memory + used by pcre2grep is actually three times this number, to allow for "before" + and "after" lines. If very long lines are encountered, the buffer is + automatically enlarged, up to a fixed maximum size. + +. The default maximum size of pcre2grep's internal buffer can be set by, for + example: + + --with-pcre2grep-max-bufsize=2097152 + + The default is either 1048576 or the value of --with-pcre2grep-bufsize, + whichever is the larger. . It is possible to compile pcre2test so that it links with the libreadline or libedit libraries, by specifying, respectively, @@ -357,6 +373,22 @@ library. They are also documented in the pcre2build man page. tgetflag, or tgoto, this is the problem, and linking with the ncurses library should fix it. +. There is a special option called --enable-fuzz-support for use by people who + want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit + library. If set, it causes an extra library called libpcre2-fuzzsupport.a to + be built, but not installed. This contains a single function called + LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the + length of the string. When called, this function tries to compile the string + as a pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the string. + Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to + be created. This is normally run under valgrind or used when PCRE2 is + compiled with address sanitizing enabled. It calls the fuzzing function and + outputs information about it is doing. The input strings are specified by + arguments: if an argument starts with "=" the rest of it is a literal input + string. Otherwise, it is assumed to be a file name, and the contents of the + file are the test string. + The "configure" script builds the following files for the basic C library: . Makefile the makefile that builds the library @@ -531,7 +563,7 @@ script creates the .txt and HTML forms of the documentation from the man pages. Testing PCRE2 ------------- +------------- To test the basic PCRE2 library on a Unix-like system, run the RunTest script. There is another script called RunGrepTest that tests the pcre2grep command. @@ -724,6 +756,7 @@ The distribution should contain the files listed below. src/pcre2_context.c ) src/pcre2_dfa_match.c ) src/pcre2_error.c ) + src/pcre2_find_bracket.c ) src/pcre2_jit_compile.c ) src/pcre2_jit_match.c ) sources for the functions in the library, src/pcre2_jit_misc.c ) and some internal functions that they use @@ -744,6 +777,7 @@ The distribution should contain the files listed below. src/pcre2_xclass.c ) src/pcre2_printint.c debugging function that is used by pcre2test, + src/pcre2_fuzzsupport.c function for (optional) fuzzing support src/config.h.in template for config.h, when built by "configure" src/pcre2.h.in template for pcre2.h when built by "configure" @@ -801,7 +835,7 @@ The distribution should contain the files listed below. libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config - libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config + libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config ltmain.sh file used to build a libtool script missing ) common stub for a few missing GNU programs while ) installing, generated by automake @@ -832,4 +866,4 @@ The distribution should contain the files listed below. Philip Hazel Email local part: ph10 Email domain: cam.ac.uk -Last updated: 24 April 2015 +Last updated: 01 November 2016 diff --git a/pcre2/RunGrepTest b/pcre2/RunGrepTest index f7db29e6c..f279bc2a1 100755 --- a/pcre2/RunGrepTest +++ b/pcre2/RunGrepTest @@ -11,7 +11,8 @@ export LC_ALL # Remove any non-default colouring and aliases that the caller may have set. -unset PCRE2GREP_COLOUR PCRE2GREP_COLOR +unset PCRE2GREP_COLOUR PCRE2GREP_COLOR PCREGREP_COLOUR PCREGREP_COLOR +unset GREP_COLOR GREP_COLORS unset cp ls mv rm # Remember the current (build) directory, set the program to be tested, and @@ -19,27 +20,37 @@ unset cp ls mv rm builddir=`pwd` pcre2grep=$builddir/pcre2grep +pcre2test=$builddir/pcre2test if [ ! -x $pcre2grep ] ; then - echo "** $pcre2grep does not exist or is not execuatble." + echo "** $pcre2grep does not exist or is not executable." + exit 1 +fi + +if [ ! -x $pcre2test ] ; then + echo "** $pcre2test does not exist or is not executable." exit 1 fi valgrind= while [ $# -gt 0 ] ; do case $1 in - valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all";; + valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file";; *) echo "RunGrepTest: Unknown argument $1"; exit 1;; esac shift done -echo " " +vjs= pcre2grep_version=`$pcre2grep -V` if [ "$valgrind" = "" ] ; then echo "Testing $pcre2grep_version" else echo "Testing $pcre2grep_version using valgrind" + $pcre2test -C jit >/dev/null + if [ $? -ne 0 ]; then + vjs="--suppressions=./testdata/valgrind-jit.supp" + fi fi # Set up a suitable "diff" command for comparison. Some systems have a diff @@ -69,14 +80,22 @@ fi # Check for the availability of UTF-8 support -./pcre2test -C unicode >/dev/null +$pcre2test -C unicode >/dev/null utf8=$? +# Check default newline convention. If it does not include LF, force LF. + +nl=`$pcre2test -C newline` +if [ "$nl" != "LF" -a "$nl" != "ANY" -a "$nl" != "ANYCRLF" ]; then + pcre2grep="$pcre2grep -N LF" + echo "Default newline setting forced to LF" +fi + # ------ Function to run and check a special pcre2grep arguments test ------- checkspecial() { - $valgrind ./pcre2grep $1 >>testtrygrep 2>&1 + $valgrind $pcre2grep $1 >>testtrygrep 2>&1 if [ $? -ne $2 ] ; then echo "** pcre2grep $1 failed - check testtrygrep" exit 1 @@ -88,253 +107,253 @@ checkspecial() echo "Testing pcre2grep main features" echo "---------------------------- Test 1 ------------------------------" >testtrygrep -(cd $srcdir; $valgrind $pcre2grep PATTERN ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep PATTERN ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 2 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep '^PATTERN' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '^PATTERN' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 3 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -in PATTERN ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 4 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -ic PATTERN ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -ic PATTERN ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 5 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 6 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 7 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 8 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 9 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 10 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 11 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -vn pattern ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -vn pattern ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 12 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -ix pattern ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -ix pattern ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 13 -----------------------------" >>testtrygrep echo seventeen >testtemp1grep -(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 14 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 15 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 16 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 17 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 18 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 19 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 20 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 21 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -nA3 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 22 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -nB3 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB3 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 23 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -C3 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -C3 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 24 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -A9 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A9 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 25 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -nB9 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB9 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 26 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 27 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -A10 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A10 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 28 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -nB10 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB10 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 29 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 30 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 31 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 32 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 33 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 34 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 35 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 36 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 37 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep +(cd $srcdir; $valgrind $vjs $pcre2grep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep echo "RC=$?" >>testtrygrep echo "======== STDERR ========" >>testtrygrep cat teststderrgrep >>testtrygrep echo "---------------------------- Test 38 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep '>\x00<' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '>\x00<' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 39 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 40 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 41 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 42 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 43 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 44 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 45 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 46 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 47 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -Fx "AB.VE +(cd $srcdir; $valgrind $vjs $pcre2grep -Fx "AB.VE elephant" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 48 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -F "AB.VE +(cd $srcdir; $valgrind $vjs $pcre2grep -F "AB.VE elephant" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 49 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -F -e DATA -e "AB.VE +(cd $srcdir; $valgrind $vjs $pcre2grep -F -e DATA -e "AB.VE elephant" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 50 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 51 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 52 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --colour=always jumps ./testdata/grepinputv) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always jumps ./testdata/grepinputv) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 53 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 54 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 55 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 56 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -c lazy ./testdata/grepinput*) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -c lazy ./testdata/grepinput*) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 57 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -c -l lazy ./testdata/grepinput*) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -c -l lazy ./testdata/grepinput*) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 58 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --regex=PATTERN ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regex=PATTERN ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 59 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 60 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --regex PATTERN ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regex PATTERN ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 61 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --regexp PATTERN ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regexp PATTERN ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 62 -----------------------------" >>testtrygrep @@ -346,188 +365,237 @@ echo "---------------------------- Test 63 -----------------------------" >>test echo "RC=$?" >>testtrygrep echo "---------------------------- Test 64 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 65 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 66 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 67 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 68 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 69 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 70 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 71 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 72 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 73 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 74 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 75 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 76 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 77 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 78 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 79 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 80 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 81 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 82 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 83 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 84 -----------------------------" >>testtrygrep echo testdata/grepinput3 >testtemp1grep -(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 85 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 86 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 87 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 88 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 89 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 90 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 91 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 92 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 93 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 94 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 95 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 96 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 97 -----------------------------" >>testtrygrep echo "grepinput$" >testtemp1grep echo "grepinput8" >>testtemp1grep -(cd $srcdir; $valgrind $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 98 -----------------------------" >>testtrygrep echo "grepinput$" >testtemp1grep echo "grepinput8" >>testtemp1grep -(cd $srcdir; $valgrind $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 99 -----------------------------" >>testtrygrep echo "grepinput$" >testtemp1grep echo "grepinput8" >testtemp2grep -(cd $srcdir; $valgrind $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 100 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 101 ------------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 102 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 103 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 104 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 105 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $pcre2grep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 106 -----------------------------" >>testtrygrep -(cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1 +(cd $srcdir; echo "a" | $valgrind $vjs $pcre2grep -M "|a" ) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 107 -----------------------------" >>testtrygrep echo "a" >testtemp1grep echo "aaaaa" >>testtemp1grep -(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 108 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -lq PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 109 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 110 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 111 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 112 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 113 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --total-count 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 114 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tc 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 115 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tlc 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 116 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -th 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 117 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tch 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 118 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tL 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 119 -----------------------------" >>testtrygrep +printf "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep +$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep echo "RC=$?" >>testtrygrep # Now compare the results. @@ -542,15 +610,15 @@ if [ $utf8 -ne 0 ] ; then echo "Testing pcre2grep UTF-8 features" echo "---------------------------- Test U1 ------------------------------" >testtrygrep - (cd $srcdir; $valgrind $pcre2grep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test U2 ------------------------------" >>testtrygrep - (cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test U3 ------------------------------" >>testtrygrep - (cd $srcdir; $valgrind $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep echo "RC=$?" >>testtrygrep $cf $srcdir/testdata/grepoutput8 testtrygrep @@ -572,27 +640,38 @@ echo "Testing pcre2grep newline settings" printf "abc\rdef\r\nghi\njkl" >testNinputgrep printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep -$valgrind $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep -$valgrind $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep pattern=`printf 'def\rjkl'` -$valgrind $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep -$valgrind $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep -$valgrind $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep -$valgrind $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep $cf $srcdir/testdata/grepoutputN testtrygrep if [ $? != 0 ] ; then exit 1; fi +# If pcre2grep supports script callouts, run some tests on them. + +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scripts in patterns are supported'; then + echo "Testing pcre2grep script callouts" + $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep + $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep + $cf $srcdir/testdata/grepoutputC testtrygrep + if [ $? != 0 ] ; then exit 1; fi +else + echo "Script callouts are not supported" +fi # Finally, some tests to exercise code that is not tested above, just to be # sure that it runs OK. Doing this improves the coverage statistics. The output diff --git a/pcre2/RunGrepTest.bat b/pcre2/RunGrepTest.bat new file mode 100644 index 000000000..a995d9cba --- /dev/null +++ b/pcre2/RunGrepTest.bat @@ -0,0 +1,688 @@ +@echo off + +:: Run pcre2grep tests. The assumption is that the PCRE2 tests check the library +:: itself. What we are checking here is the file handling and options that are +:: supported by pcre2grep. This script must be run in the build directory. +:: (jmh: I've only tested in the main directory, using my own builds.) + +setlocal enabledelayedexpansion + +:: Remove any non-default colouring that the caller may have set. + +set PCRE2GREP_COLOUR= +set PCRE2GREP_COLOR= +set PCREGREP_COLOUR= +set PCREGREP_COLOR= +set GREP_COLORS= +set GREP_COLOR= + +:: Remember the current (build) directory and set the program to be tested. + +set builddir="%CD%" +set pcre2grep=%builddir%\pcre2grep.exe +set pcre2test=%builddir%\pcre2test.exe + +if NOT exist %pcre2grep% ( + echo ** %pcre2grep% does not exist. + exit /b 1 +) + +if NOT exist %pcre2test% ( + echo ** %pcre2test% does not exist. + exit /b 1 +) + +for /f "delims=" %%a in ('"%pcre2grep%" -V') do set pcre2grep_version=%%a +echo Testing %pcre2grep_version% + +:: Set up a suitable "diff" command for comparison. Some systems have a diff +:: that lacks a -u option. Try to deal with this; better do the test for the -b +:: option as well. Use FC if there's no diff, taking care to ignore equality. + +set cf= +set cfout= +diff -b nul nul 2>nul && set cf=diff -b +diff -u nul nul 2>nul && set cf=diff -u +diff -ub nul nul 2>nul && set cf=diff -ub +if NOT defined cf ( + set cf=fc /n + set "cfout=>testcf || (type testcf & cmd /c exit /b 1)" +) + +:: Set srcdir to the current or parent directory, whichever one contains the +:: test data. Subsequently, we run most of the pcre2grep tests in the source +:: directory so that the file names in the output are always the same. + +if NOT defined srcdir set srcdir=. +if NOT exist %srcdir%\testdata\ ( + if exist testdata\ ( + set srcdir=. + ) else if exist ..\testdata\ ( + set srcdir=.. + ) else if exist ..\..\testdata\ ( + set srcdir=..\.. + ) else ( + echo Cannot find the testdata directory + exit /b 1 + ) +) + +:: Check for the availability of UTF-8 support + +%pcre2test% -C unicode >nul +set utf8=%ERRORLEVEL% + +:: Check default newline convention. If it does not include LF, force LF. + +for /f %%a in ('"%pcre2test%" -C newline') do set nl=%%a +if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" ( + set pcre2grep=%pcre2grep% -N LF + echo Default newline setting forced to LF +) + +:: Create a simple printf via cscript/JScript (an actual printf may translate +:: LF to CRLF, which this one does not). + +echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n")) >printf.js +set printf=cscript //nologo printf.js + +:: ------ Normal tests ------ + +echo Testing pcre2grep main features + +echo ---------------------------- Test 1 ------------------------------>testtrygrep +(pushd %srcdir% & %pcre2grep% PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 2 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% "^PATTERN" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 3 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 4 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -ic PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 5 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 6 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -inh PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 7 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -il PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 8 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -l PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 9 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -q PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 10 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 11 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -vn pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 12 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -ix pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 13 ----------------------------->>testtrygrep +echo seventeen >testtemp1grep +(pushd %srcdir% & %pcre2grep% -f./testdata/greplist -f %builddir%\testtemp1grep ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 14 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -w pat ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 15 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "abc^*" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 16 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% abc ./testdata/grepinput ./testdata/nonexistfile & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 17 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -M "the\noutput" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 18 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn "(the\noutput|dog\.\n--)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 19 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mix "Pattern" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 20 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mixn "complete pair\nof lines" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 21 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nA3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 22 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 23 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -C3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 24 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 25 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 26 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A9 -B9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 27 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 28 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 29 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -C12 -B10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 30 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -inB3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 31 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -inA3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 32 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L "fox" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 33 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 34 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -s "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 35 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 36 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude "grepinput$" --exclude=grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 37 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "^(a+)*\d" ./testdata/grepinput & popd) >>testtrygrep 2>teststderrgrep +echo RC=^%ERRORLEVEL%>>testtrygrep +echo ======== STDERR ========>>testtrygrep +type teststderrgrep >>testtrygrep + +echo ---------------------------- Test 38 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% ">\x00<" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 39 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -A1 "before the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 40 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 "after the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 41 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 -o "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 42 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 -onH "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 43 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 44 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on -e before -ezero -e after ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 45 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on -f ./testdata/greplist -e binary ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 46 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 47 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Fx AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 48 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -F AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 49 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -F -e DATA -e AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 50 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% "^(abc|def|ghi|jkl)" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 51 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mv "brown\sfox" ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 52 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always jumps ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 53 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 54 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 55 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -f./testdata/greplist --color=always ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 56 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -c lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 57 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -c -l lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 58 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex=PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 59 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regexp=PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 60 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 61 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regexp PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 62 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --match-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 63 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --recursion-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 64 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o1 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 65 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 66 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o3 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 67 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o12 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 68 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --only-matching=2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 69 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -vn --colour=always pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 70 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 71 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 72 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 73 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 74 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 75 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 76 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 77 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 78 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 79 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 80 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 81 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 82 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 83 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 84 ----------------------------->>testtrygrep +echo testdata/grepinput3 >testtemp1grep +(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --file-list %builddir%\testtemp1grep "fox|complete|t7" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 85 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 86 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 87 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 88 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -v "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 89 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -I "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 90 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=without-match "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 91 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -a "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 92 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 93 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 94 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 "fox" ./testdata/grepinput* | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 95 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 96 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" "fox" ./test* | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 97 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 98 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 99 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>testtemp2grep +(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 100 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Ho2 --only-matching=1 -o3 "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 101 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator="|" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 102 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -n "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 103 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 104 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -n --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 105 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always "ipsum|" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 106 ----------------------------->>testtrygrep +(pushd %srcdir% & echo a| %pcre2grep% -M "|a" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 107 ----------------------------->>testtrygrep +echo a>testtemp1grep +echo aaaaa>>testtemp1grep +(pushd %srcdir% & %pcre2grep% --line-offsets "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 108 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -lq PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 109 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -cq lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 110 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --om-separator / -Mo0 -o1 -o2 "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 111 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 112 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 113 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --total-count "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 114 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tc "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 115 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tlc "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 116 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -th "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 117 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tch "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 118 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tL "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 119 ----------------------------->>testtrygrep +%printf% "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep +%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +:: Now compare the results. + +%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout% +if ERRORLEVEL 1 exit /b 1 + + +:: These tests require UTF-8 support + +if %utf8% neq 0 ( + echo Testing pcre2grep UTF-8 features + + echo ---------------------------- Test U1 ------------------------------>testtrygrep + (pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^%ERRORLEVEL%>>testtrygrep + + echo ---------------------------- Test U2 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^%ERRORLEVEL%>>testtrygrep + + echo ---------------------------- Test U3 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^%ERRORLEVEL%>>testtrygrep + + %cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout% + if ERRORLEVEL 1 exit /b 1 + +) else ( + echo Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library +) + + +:: We go to some contortions to try to ensure that the tests for the various +:: newline settings will work in environments where the normal newline sequence +:: is not \n. Do not use exported files, whose line endings might be changed. +:: Instead, create an input file so that its contents are exactly what we want. +:: These tests are run in the build directory. + +echo Testing pcre2grep newline settings +%printf% "abc\rdef\r\nghi\njkl" >testNinputgrep + +echo ---------------------------- Test N1 ------------------------------>testtrygrep +%pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N2 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N3 ------------------------------>>testtrygrep +for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a +%pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N4 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=crlf -F -f %srcdir%/testdata/greppatN4 testNinputgrep >>testtrygrep + +echo ---------------------------- Test N5 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N6 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +%cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout% +if ERRORLEVEL 1 exit /b 1 + +:: If pcre2grep supports script callouts, run some tests on them. + +%pcre2grep% --help | %pcre2grep% -q "Callout scripts in patterns are supported" +if %ERRORLEVEL% equ 0 ( + echo Testing pcre2grep script callouts + %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep + %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep + %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% + if ERRORLEVEL 1 exit /b 1 +) else ( + echo Script callouts are not supported +) + +:: Finally, some tests to exercise code that is not tested above, just to be +:: sure that it runs OK. Doing this improves the coverage statistics. The output +:: is not checked. + +echo Testing miscellaneous pcre2grep arguments (unchecked) +%printf% "" >testtrygrep +call :checkspecial "-xxxxx" 2 || exit /b 1 +call :checkspecial "--help" 0 || exit /b 1 +call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1 + +:: Clean up local working files +del testcf printf.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep + +exit /b 0 + +:: ------ Function to run and check a special pcre2grep arguments test ------- + +:checkspecial + %pcre2grep% %~1 >>testtrygrep 2>&1 + if %ERRORLEVEL% neq %2 ( + echo ** pcre2grep %~1 failed - check testtrygrep + exit /b 1 + ) + exit /b 0 + +:: End diff --git a/pcre2/RunTest b/pcre2/RunTest index c4d659c76..49e099bdc 100755 --- a/pcre2/RunTest +++ b/pcre2/RunTest @@ -33,6 +33,10 @@ # For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may # be given without the leading "-" character. # +# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need +# very much more stack than normal. In environments where the stack can be +# set at runtime, -bigstack sets a gigantic stack. +# # There are two special cases where only one argument is allowed: # # If the first and only argument is "ebcdic", the script runs the special @@ -49,7 +53,7 @@ title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)" title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)" -title2="Test 2: API, errors, internals, and non-Perl stuff" +title2="Test 2: API, errors, internals and non-Perl stuff" title3="Test 3: Locale-specific features" title4A="Test 4: UTF" title4B=" and Unicode property support (compatible with Perl >= 5.10)" @@ -64,13 +68,17 @@ title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support" title11="Test 11: Specials for the basic 16-bit and 32-bit libraries" title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support" title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries" -title14="Test 14: Non-JIT limits and other non-JIT tests" -title15="Test 15: JIT-specific features when JIT is not available" -title16="Test 16: JIT-specific features when JIT is available" -title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP" -title18="Test 18: Tests of the POSIX interface with UTF/UCP" -title19="Test 19: Serialization tests" -maxtest=18 +title14="Test 14: DFA specials for UTF and UCP support" +title15="Test 15: Non-JIT limits and other non-JIT tests" +title16="Test 16: JIT-specific features when JIT is not available" +title17="Test 17: JIT-specific features when JIT is available" +title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP" +title19="Test 19: Tests of the POSIX interface with UTF/UCP" +title20="Test 20: Serialization and code copy tests" +title21="Test 21: \C tests without UTF (supported for DFA matching)" +title22="Test 22: \C tests with UTF (not supported for DFA matching)" +title23="Test 23: \C disabled test" +maxtest=23 if [ $# -eq 1 -a "$1" = "list" ]; then echo $title0 @@ -93,6 +101,10 @@ if [ $# -eq 1 -a "$1" = "list" ]; then echo $title17 echo $title18 echo $title19 + echo $title20 + echo $title21 + echo $title22 + echo $title23 exit 0 fi @@ -151,7 +163,7 @@ checkresult() checkspecial() { - $valgrind ./pcre2test $1 >>testtry + $valgrind $vjs ./pcre2test $1 >>testtry if [ $? -ne 0 ] ; then echo "** pcre2test $1 failed - check testtry" exit 1 @@ -184,9 +196,11 @@ arg8= arg16= arg32= nojit= +bigstack= sim= skip= valgrind= +vjs= # This is in case the caller has set aliases (as I do - PH) unset cp ls mv rm @@ -214,6 +228,10 @@ do16=no do17=no do18=no do19=no +do20=no +do21=no +do22=no +do23=no while [ $# -gt 0 ] ; do case $1 in @@ -237,13 +255,18 @@ while [ $# -gt 0 ] ; do 17) do17=yes;; 18) do18=yes;; 19) do19=yes;; + 20) do20=yes;; + 21) do21=yes;; + 22) do22=yes;; + 23) do23=yes;; -8) arg8=yes;; -16) arg16=yes;; -32) arg32=yes;; + bigstack|-bigstack) bigstack=yes;; nojit|-nojit) nojit=yes;; sim|-sim) shift; sim=$1;; - valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";; - valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";; + valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file";; + valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";; ~*) if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then skip="$skip `expr "$1" : '~\([0-9]*\)*$'`" @@ -287,13 +310,25 @@ fi # If it is possible to set the system stack size, arrange to set a value for # test 2, which needs more than the even the Linux default when PCRE2 has been -# compiled with -fsanitize=address. +# compiled by gcc with -fsanitize=address. If "bigstack" is on the command +# line, set even bigger numbers. When the compiler is clang, sanitize options +# require an even bigger stack for test 2, and an increased stack for some of +# the other tests. Test 2 now has code to automatically try again with a 64M +# stack if it crashes when test2stack is "-S 16" when matching with the +# interpreter. $sim ./pcre2test -S 1 /dev/null /dev/null if [ $? -eq 0 ] ; then - test2stack="-S 16" + if [ "$bigstack" = "" ] ; then + test2stack="-S 16" + defaultstack="" + else + test2stack="-S 1024" + defaultstack="-S 64" + fi else test2stack="" + defaultstack="" fi # All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only @@ -306,6 +341,11 @@ support16=$? $sim ./pcre2test -C pcre2-32 >/dev/null support32=$? +# \C may be disabled + +$sim ./pcre2test -C backslash-C >/dev/null +supportBSC=$? + # Initialize all bitsizes skipped test8=skip @@ -358,11 +398,18 @@ fi $sim ./pcre2test -C unicode >/dev/null utf=$? +# When JIT is used with valgrind, we need to set up valgrind suppressions as +# otherwise there are a lot of false positive valgrind reports when the +# the hardware supports SSE2. + jitopt= $sim ./pcre2test -C jit >/dev/null jit=$? if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then jitopt=-jit + if [ "$valgrind" != "" ] ; then + vjs="--suppressions=$testdata/valgrind-jit.supp" + fi fi # If no specific tests were requested, select all. Those that are not @@ -372,7 +419,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ $do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \ $do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \ $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \ - $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no \ + $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \ + $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no \ ]; then do0=yes do1=yes @@ -394,6 +442,10 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ do17=yes do18=yes do19=yes + do20=yes + do21=yes + do22=yes + do23=yes fi # Handle any explicit skips at this stage, so that an argument list may consist @@ -438,7 +490,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $do1 = yes ] ; then echo $title1 for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput1 testtry checkresult $? 1 "$opt" done fi @@ -448,17 +500,34 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $do2 = yes ] ; then echo $title2 "(excluding UTF-$bits)" for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry if [ $? = 0 ] ; then + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry checkresult $? 2 "$opt" else echo " " - echo "** Test 2 requires a lot of stack. If it has crashed with a" - echo "** segmentation fault, it may be that you do not have enough" - echo "** stack available by default. Please see the 'pcre2stack' man" - echo "** page for a discussion of PCRE2's stack usage." + echo "** Test 2, when run under the interpreter, requires a lot of stack." + echo "** If it has crashed with a segmentation fault, it may be that you" + echo "** do not have enough stack available by default. Please see the" + echo "** 'pcre2stack' man page for a discussion of PCRE2's stack usage." + if [ "$test2stack" != "-S 16" -o "$opt" != "" ]; then + echo " " + exit 1 + fi echo " " - exit 1 + echo "** Trying again with an increased stack size." + echo " " + echo $title2 "(excluding UTF-$bits) (64M stack)" + $sim $valgrind ${opt:+$vjs} ./pcre2test -q -S 64 $bmode $opt $testdata/testinput2 testtry + if [ $? = 0 ] ; then + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry + checkresult $? 2 "$opt" + else + echo " " + echo "** Failed with an increased stack size. Tests abandoned." + echo " " + exit 1 + fi fi done fi @@ -508,7 +577,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ "$locale" != "" ] ; then echo $title3 "(using '$locale' locale)" for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $infile testtry if [ $? = 0 ] ; then case "$opt" in -jit) with=" with JIT";; @@ -545,7 +614,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput4 testtry checkresult $? 4 "$opt" done fi @@ -557,7 +626,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput5 testtry checkresult $? 5 "$opt" done fi @@ -567,7 +636,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $do6 = yes ] ; then echo $title6 - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput6 testtry checkresult $? 6 "" fi @@ -576,28 +645,26 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry + $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput7 testtry checkresult $? 7 "" fi fi # Test of internal offsets and code sizes. This test is run only when there - # is UTF/UCP support and the link size is 2. The actual tests are - # mostly the same as in some of the above, but in this test we inspect some - # offsets and sizes that require a known link size. This is a doublecheck for - # the maintainer, just in case something changes unexpectely. The output from - # this test is different in 8-bit, 16-bit, and 32-bit modes, so there are - # mode-specific output files. + # is UTF/UCP support. The actual tests are mostly the same as in some of the + # above, but in this test we inspect some offsets and sizes. This is a + # doublecheck for the maintainer, just in case something changes unexpectely. + # The output from this test is different in 8-bit, 16-bit, and 32-bit modes + # and for different link sizes, so there are different output files for each + # mode and link size. if [ $do8 = yes ] ; then echo $title8 - if [ $link_size -ne 2 ] ; then - echo " Skipped because link size is not 2" - elif [ $utf -eq 0 ] ; then + if [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry - checkresult $? 8-$bits "" + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput8 testtry + checkresult $? 8-$bits-$link_size "" fi fi @@ -609,7 +676,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped when running 16/32-bit tests" else for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput9 testtry checkresult $? 9 "$opt" done fi @@ -625,7 +692,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput10 testtry checkresult $? 10 "$opt" done fi @@ -639,7 +706,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped when running 8-bit tests" else for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput11 testtry checkresult $? 11-$bits "$opt" done fi @@ -656,7 +723,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput12 testtry checkresult $? 12-$bits "$opt" done fi @@ -669,75 +736,129 @@ for bmode in "$test8" "$test16" "$test32"; do if [ "$bits" = "8" ] ; then echo " Skipped when running 8-bit tests" else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput13 testtry checkresult $? 13 "" fi fi + # Tests for DFA UTF and UCP features. Output is different for the different widths. + + if [ $do14 = yes ] ; then + echo $title14 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry + checkresult $? 14-$bits "" + fi + fi + # Test non-JIT match and recursion limits - if [ $do14 = yes ] ; then - echo $title14 - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry - checkresult $? 14 "" + if [ $do15 = yes ] ; then + echo $title15 + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry + checkresult $? 15 "" fi # Test JIT-specific features when JIT is not available - if [ $do15 = yes ] ; then - echo $title15 + if [ $do16 = yes ] ; then + echo $title16 if [ $jit -ne 0 ] ; then echo " Skipped because JIT is available" else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry - checkresult $? 15 "" + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry + checkresult $? 16 "" fi fi # Test JIT-specific features when JIT is available - if [ $do16 = yes ] ; then - echo $title16 + if [ $do17 = yes ] ; then + echo $title17 if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then echo " Skipped because JIT is not available or nojit was specified" else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry - checkresult $? 16 "" + $sim $valgrind $vjs ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry + checkresult $? 17 "" fi fi # Tests for the POSIX interface without UTF/UCP (8-bit only) - if [ $do17 = yes ] ; then - echo $title17 + if [ $do18 = yes ] ; then + echo $title18 if [ "$bits" = "16" -o "$bits" = "32" ] ; then echo " Skipped when running 16/32-bit tests" else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry - checkresult $? 17 "" + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry + checkresult $? 18 "" fi fi # Tests for the POSIX interface with UTF/UCP (8-bit only) - if [ $do18 = yes ] ; then - echo $title18 + if [ $do19 = yes ] ; then + echo $title19 if [ "$bits" = "16" -o "$bits" = "32" ] ; then echo " Skipped when running 16/32-bit tests" elif [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry - checkresult $? 18 "" + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry + checkresult $? 19 "" fi fi # Serialization tests - if [ $do19 = yes ] ; then - echo $title19 - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput19 testtry - checkresult $? 19 "" + if [ $do20 = yes ] ; then + echo $title20 + $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput20 testtry + checkresult $? 20 "" + fi + + # \C tests without UTF - DFA matching is supported + + if [ "$do21" = yes ] ; then + echo $title21 + if [ $supportBSC -eq 0 ] ; then + echo " Skipped because \C is disabled" + else + for opt in "" $jitopt -dfa; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput21 testtry + checkresult $? 21 "$opt" + done + fi + fi + + # \C tests with UTF - DFA matching is not supported for \C in UTF mode + + if [ "$do22" = yes ] ; then + echo $title22 + if [ $supportBSC -eq 0 ] ; then + echo " Skipped because \C is disabled" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput22 testtry + checkresult $? 22-$bits "$opt" + done + fi + fi + + # Test when \C is disabled + + if [ "$do23" = yes ] ; then + echo $title23 + if [ $supportBSC -ne 0 ] ; then + echo " Skipped because \C is not disabled" + else + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput23 testtry + checkresult $? 23 "" + fi fi # End of loop for 8/16/32-bit tests diff --git a/pcre2/RunTest.bat b/pcre2/RunTest.bat index 45bdfcbcb..b13146349 100644 --- a/pcre2/RunTest.bat +++ b/pcre2/RunTest.bat @@ -13,17 +13,19 @@ @rem line. Added argument validation and added error reporting. @rem @rem Sheri Pierce added logic to skip feature dependent tests -@rem tests 4 5 9 15 and 18 require utf support -@rem tests 6 7 10 16 and 19 require ucp support -@rem 11 requires ucp and link size 2 -@rem 12 requires presence of jit support -@rem 13 requires absence of jit support +@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support +@rem 8 requires Unicode and link size 2 +@rem 16 requires absence of jit support +@rem 17 requires presence of jit support @rem Sheri P also added override tests for study and jit testing @rem Zoltan Herczeg added libpcre16 support @rem Zoltan Herczeg added libpcre32 support @rem ------------------------------------------------------------------- @rem @rem The file was converted for PCRE2 by PH, February 2015. +@rem Updated for new test 14 (moving others up a number), August 2015. +@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015. +@rem PH added missing "set type" for test 22, April 2016. setlocal enabledelayedexpansion @@ -64,6 +66,8 @@ set support32=%ERRORLEVEL% set unicode=%ERRORLEVEL% %pcre2test% -C jit >NUL set jit=%ERRORLEVEL% +%pcre2test% -C backslash-C >NUL +set supportBSC=%ERRORLEVEL% if %support8% EQU 1 ( if not exist testout8 md testout8 @@ -99,18 +103,22 @@ set do16=no set do17=no set do18=no set do19=no +set do20=no +set do21=no +set do22=no +set do23=no set all=yes for %%a in (%*) do ( set valid=no - for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19) do if %%v == %%a set valid=yes + for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes if "!valid!" == "yes" ( set do%%a=yes set all=no ) else ( echo Invalid test number - %%a! echo Usage %0 [ test_number ] ... - echo Where test_number is one or more optional test numbers 1 through 19, default is all tests. + echo Where test_number is one or more optional test numbers 1 through 23, default is all tests. exit /b 1 ) ) @@ -136,6 +144,10 @@ if "%all%" == "yes" ( set do17=yes set do18=yes set do19=yes + set do20=yes + set do21=yes + set do22=yes + set do23=yes ) @echo RunTest.bat's pcre2test output is written to newly created subfolders @@ -183,6 +195,10 @@ if "%do16%" == "yes" call :do16 if "%do17%" == "yes" call :do17 if "%do18%" == "yes" call :do18 if "%do19%" == "yes" call :do19 +if "%do20%" == "yes" call :do20 +if "%do21%" == "yes" call :do21 +if "%do22%" == "yes" call :do22 +if "%do23%" == "yes" call :do23 :modeSkip if "%mode%" == "" ( set mode=-16 @@ -227,11 +243,16 @@ if [%3] == [] ( exit /b 1 ) +if %1 == 8 ( + set outnum=8-%bits%-%link_size% +) else ( + set outnum=%1 +) set testinput=testinput%1 -set testoutput=testoutput%1 +set testoutput=testoutput%outnum% if exist %srcdir%\testdata\win%testinput% ( set testinput=wintestinput%1 - set testoutput=wintestoutput%1 + set testoutput=wintestoutput%outnum% ) echo Test %1: %3 @@ -241,18 +262,23 @@ if errorlevel 1 ( echo. %pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput% set failed="yes" goto :eof +) else if [%1]==[2] ( + %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -63,-62,-2,-1,0,100,188,189,190,191 >>%2%bits%\%testoutput% ) set type= -if [%1]==[8] ( - set type=-%bits% -) if [%1]==[11] ( set type=-%bits% ) if [%1]==[12] ( set type=-%bits% ) +if [%1]==[14] ( + set type=-%bits% +) +if [%1]==[22] ( + set type=-%bits% +) fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL @@ -316,7 +342,7 @@ if %unicode% EQU 0 ( goto :eof :do6 - call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q -dfa + call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q goto :eof :do7 @@ -324,7 +350,7 @@ if %unicode% EQU 0 ( echo Test 7 Skipped due to absence of Unicode support. goto :eof ) - call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q -dfa + call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q goto :eof :do8 @@ -388,39 +414,35 @@ if %bits% EQU 8 ( echo Test 13 Skipped when running 8-bit tests. goto :eof ) - call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q -dfa + call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q goto :eof :do14 -call :runsub 14 testout "Non-JIT limits and other non_JIT tests" -q -goto :eof - -:do15 -if %jit% EQU 1 ( - echo Test 15 Skipped due to presence of JIT support. +if %unicode% EQU 0 ( + echo Test 14 Skipped due to absence of Unicode support. goto :eof ) - call :runsub 15 testout "JIT-specific features when JIT is not available" -q + call :runsub 14 testout "DFA specials for UTF and UCP support" -q + goto :eof + +:do15 +call :runsub 15 testout "Non-JIT limits and other non_JIT tests" -q goto :eof :do16 -if %jit% EQU 0 ( - echo Test 16 Skipped due to absence of JIT support. +if %jit% EQU 1 ( + echo Test 16 Skipped due to presence of JIT support. goto :eof ) - call :runsub 16 testout "JIT-specific features when JIT is available" -q + call :runsub 16 testout "JIT-specific features when JIT is not available" -q goto :eof :do17 -if %bits% EQU 16 ( - echo Test 17 Skipped when running 16-bit tests. +if %jit% EQU 0 ( + echo Test 17 Skipped due to absence of JIT support. goto :eof ) -if %bits% EQU 32 ( - echo Test 17 Skipped when running 32-bit tests. - goto :eof -) - call :runsub 17 testout "POSIX interface, excluding UTF-8 and UCP" -q + call :runsub 17 testout "JIT-specific features when JIT is available" -q goto :eof :do18 @@ -432,11 +454,58 @@ if %bits% EQU 32 ( echo Test 18 Skipped when running 32-bit tests. goto :eof ) - call :runsub 1 testout "POSIX interface with UTF-8 and UCP" -q + call :runsub 18 testout "POSIX interface, excluding UTF-8 and UCP" -q goto :eof :do19 -call :runsub 1 testout "Serialization tests" -q +if %bits% EQU 16 ( + echo Test 19 Skipped when running 16-bit tests. + goto :eof +) +if %bits% EQU 32 ( + echo Test 19 Skipped when running 32-bit tests. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 19 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q +goto :eof + +:do20 +call :runsub 20 testout "Serialization tests" -q +goto :eof + +:do21 +if %supportBSC% EQU 0 ( + echo Test 21 Skipped due to absence of backslash-C support. + goto :eof +) + call :runsub 21 testout "Backslash-C tests without UTF" -q + call :runsub 21 testout "Backslash-C tests without UTF (DFA)" -q -dfa + if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do22 +if %supportBSC% EQU 0 ( + echo Test 22 Skipped due to absence of backslash-C support. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 22 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 22 testout "Backslash-C tests with UTF" -q + if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do23 +if %supportBSC% EQU 1 ( + echo Test 23 Skipped due to presence of backslash-C support. + goto :eof +) + call :runsub 23 testout "Backslash-C disabled test" -q goto :eof :conferror diff --git a/pcre2/aclocal.m4 b/pcre2/aclocal.m4 index d1a41f717..d7204d97d 100644 --- a/pcre2/aclocal.m4 +++ b/pcre2/aclocal.m4 @@ -20,32 +20,63 @@ You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) -# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- -# serial 1 (pkg-config-0.24) -# -# Copyright © 2004 Scott James Remnant . -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. +dnl pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +dnl serial 11 (pkg-config-0.29.1) +dnl +dnl Copyright © 2004 Scott James Remnant . +dnl Copyright © 2012-2015 Dan Nicholson +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +dnl 02111-1307, USA. +dnl +dnl As a special exception to the GNU General Public License, if you +dnl distribute this file as part of a program that contains a +dnl configuration script generated by Autoconf, you may include it under +dnl the same distribution terms that you use for the rest of that +dnl program. -# PKG_PROG_PKG_CONFIG([MIN-VERSION]) -# ---------------------------------- +dnl PKG_PREREQ(MIN-VERSION) +dnl ----------------------- +dnl Since: 0.29 +dnl +dnl Verify that the version of the pkg-config macros are at least +dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's +dnl installed version of pkg-config, this checks the developer's version +dnl of pkg.m4 when generating configure. +dnl +dnl To ensure that this macro is defined, also add: +dnl m4_ifndef([PKG_PREREQ], +dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) +dnl +dnl See the "Since" comment for each macro you use to see what version +dnl of the macros you require. +m4_defun([PKG_PREREQ], +[m4_define([PKG_MACROS_VERSION], [0.29.1]) +m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, + [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) +])dnl PKG_PREREQ + +dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) +dnl ---------------------------------- +dnl Since: 0.16 +dnl +dnl Search for the pkg-config tool and set the PKG_CONFIG variable to +dnl first found in the path. Checks that the version of pkg-config found +dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is +dnl used since that's the first version where most current features of +dnl pkg-config existed. AC_DEFUN([PKG_PROG_PKG_CONFIG], [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) @@ -67,18 +98,19 @@ if test -n "$PKG_CONFIG"; then PKG_CONFIG="" fi fi[]dnl -])# PKG_PROG_PKG_CONFIG +])dnl PKG_PROG_PKG_CONFIG -# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -# Check to see whether a particular set of modules exists. Similar -# to PKG_CHECK_MODULES(), but does not set variables or print errors. -# -# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -# only at the first occurence in configure.ac, so if the first place -# it's called might be skipped (such as if it is within an "if", you -# have to call PKG_CHECK_EXISTS manually -# -------------------------------------------------------------- +dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------------------------------- +dnl Since: 0.18 +dnl +dnl Check to see whether a particular set of modules exists. Similar to +dnl PKG_CHECK_MODULES(), but does not set variables or print errors. +dnl +dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +dnl only at the first occurence in configure.ac, so if the first place +dnl it's called might be skipped (such as if it is within an "if", you +dnl have to call PKG_CHECK_EXISTS manually AC_DEFUN([PKG_CHECK_EXISTS], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl if test -n "$PKG_CONFIG" && \ @@ -88,8 +120,10 @@ m4_ifvaln([$3], [else $3])dnl fi]) -# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) -# --------------------------------------------- +dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +dnl --------------------------------------------- +dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting +dnl pkg_failed based on the result. m4_define([_PKG_CONFIG], [if test -n "$$1"; then pkg_cv_[]$1="$$1" @@ -101,10 +135,11 @@ m4_define([_PKG_CONFIG], else pkg_failed=untried fi[]dnl -])# _PKG_CONFIG +])dnl _PKG_CONFIG -# _PKG_SHORT_ERRORS_SUPPORTED -# ----------------------------- +dnl _PKG_SHORT_ERRORS_SUPPORTED +dnl --------------------------- +dnl Internal check to see if pkg-config supports short errors. AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -112,19 +147,17 @@ if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then else _pkg_short_errors_supported=no fi[]dnl -])# _PKG_SHORT_ERRORS_SUPPORTED +])dnl _PKG_SHORT_ERRORS_SUPPORTED -# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -# [ACTION-IF-NOT-FOUND]) -# -# -# Note that if there is a possibility the first call to -# PKG_CHECK_MODULES might not happen, you should be sure to include an -# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac -# -# -# -------------------------------------------------------------- +dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl -------------------------------------------------------------- +dnl Since: 0.4.0 +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES might not happen, you should be sure to include an +dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac AC_DEFUN([PKG_CHECK_MODULES], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl @@ -178,16 +211,40 @@ else AC_MSG_RESULT([yes]) $3 fi[]dnl -])# PKG_CHECK_MODULES +])dnl PKG_CHECK_MODULES -# PKG_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable pkgconfigdir as the location where a module -# should install pkg-config .pc files. By default the directory is -# $libdir/pkgconfig, but the default can be changed by passing -# DIRECTORY. The user can override through the --with-pkgconfigdir -# parameter. +dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl --------------------------------------------------------------------- +dnl Since: 0.29 +dnl +dnl Checks for existence of MODULES and gathers its build flags with +dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags +dnl and VARIABLE-PREFIX_LIBS from --libs. +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to +dnl include an explicit call to PKG_PROG_PKG_CONFIG in your +dnl configure.ac. +AC_DEFUN([PKG_CHECK_MODULES_STATIC], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +_save_PKG_CONFIG=$PKG_CONFIG +PKG_CONFIG="$PKG_CONFIG --static" +PKG_CHECK_MODULES($@) +PKG_CONFIG=$_save_PKG_CONFIG[]dnl +])dnl PKG_CHECK_MODULES_STATIC + + +dnl PKG_INSTALLDIR([DIRECTORY]) +dnl ------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable pkgconfigdir as the location where a module +dnl should install pkg-config .pc files. By default the directory is +dnl $libdir/pkgconfig, but the default can be changed by passing +dnl DIRECTORY. The user can override through the --with-pkgconfigdir +dnl parameter. AC_DEFUN([PKG_INSTALLDIR], [m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) m4_pushdef([pkg_description], @@ -198,16 +255,18 @@ AC_ARG_WITH([pkgconfigdir], AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) m4_popdef([pkg_default]) m4_popdef([pkg_description]) -]) dnl PKG_INSTALLDIR +])dnl PKG_INSTALLDIR -# PKG_NOARCH_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable noarch_pkgconfigdir as the location where a -# module should install arch-independent pkg-config .pc files. By -# default the directory is $datadir/pkgconfig, but the default can be -# changed by passing DIRECTORY. The user can override through the -# --with-noarch-pkgconfigdir parameter. +dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) +dnl -------------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable noarch_pkgconfigdir as the location where a +dnl module should install arch-independent pkg-config .pc files. By +dnl default the directory is $datadir/pkgconfig, but the default can be +dnl changed by passing DIRECTORY. The user can override through the +dnl --with-noarch-pkgconfigdir parameter. AC_DEFUN([PKG_NOARCH_INSTALLDIR], [m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) m4_pushdef([pkg_description], @@ -218,13 +277,15 @@ AC_ARG_WITH([noarch-pkgconfigdir], AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) m4_popdef([pkg_default]) m4_popdef([pkg_description]) -]) dnl PKG_NOARCH_INSTALLDIR +])dnl PKG_NOARCH_INSTALLDIR -# PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, -# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# ------------------------------------------- -# Retrieves the value of the pkg-config variable for the given module. +dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------- +dnl Since: 0.28 +dnl +dnl Retrieves the value of the pkg-config variable for the given module. AC_DEFUN([PKG_CHECK_VAR], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl @@ -233,7 +294,7 @@ _PKG_CONFIG([$1], [variable="][$3]["], [$2]) AS_VAR_COPY([$1], [pkg_cv_][$1]) AS_VAR_IF([$1], [""], [$5], [$4])dnl -])# PKG_CHECK_VAR +])dnl PKG_CHECK_VAR # Copyright (C) 2002-2014 Free Software Foundation, Inc. # diff --git a/pcre2/config-cmake.h.in b/pcre2/config-cmake.h.in index b74a7aaab..6ea4cc9b7 100644 --- a/pcre2/config-cmake.h.in +++ b/pcre2/config-cmake.h.in @@ -33,6 +33,7 @@ #cmakedefine EBCDIC 1 #cmakedefine EBCDIC_NL25 1 #cmakedefine HEAP_MATCH_RECURSE 1 +#cmakedefine NEVER_BACKSLASH_C 1 #define LINK_SIZE @PCRE2_LINK_SIZE@ #define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ @@ -40,6 +41,7 @@ #define NEWLINE_DEFAULT @NEWLINE_DEFAULT@ #define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@ #define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@ +#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@ #define MAX_NAME_SIZE 32 #define MAX_NAME_COUNT 10000 diff --git a/pcre2/configure b/pcre2/configure index 15777d450..2a3694ff0 100755 --- a/pcre2/configure +++ b/pcre2/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for PCRE2 10.20. +# Generated by GNU Autoconf 2.69 for PCRE2 10.23. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -587,8 +587,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PCRE2' PACKAGE_TARNAME='pcre2' -PACKAGE_VERSION='10.20' -PACKAGE_STRING='PCRE2 10.20' +PACKAGE_VERSION='10.23' +PACKAGE_STRING='PCRE2 10.23' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -659,6 +659,8 @@ PTHREAD_CC ax_pthread_config PCRE2_STATIC_CFLAG LIBREADLINE +WITH_FUZZ_SUPPORT_FALSE +WITH_FUZZ_SUPPORT_TRUE WITH_VALGRIND_FALSE WITH_VALGRIND_TRUE WITH_UNICODE_FALSE @@ -712,6 +714,8 @@ build_os build_vendor build_cpu build +ac_ct_AR +AR EGREP GREP CPP @@ -732,8 +736,6 @@ CPPFLAGS LDFLAGS CFLAGS CC -ac_ct_AR -AR AM_BACKSLASH AM_DEFAULT_VERBOSITY AM_DEFAULT_V @@ -821,6 +823,7 @@ enable_pcre2_32 enable_debug enable_jit enable_pcre2grep_jit +enable_pcre2grep_callout enable_rebuild_chartables enable_unicode enable_newline_is_cr @@ -829,12 +832,14 @@ enable_newline_is_crlf enable_newline_is_anycrlf enable_newline_is_any enable_bsr_anycrlf +enable_never_backslash_C enable_ebcdic enable_ebcdic_nl25 enable_stack_for_recursion enable_pcre2grep_libz enable_pcre2grep_libbz2 with_pcre2grep_bufsize +with_pcre2grep_max_bufsize enable_pcre2test_libedit enable_pcre2test_libreadline with_link_size @@ -843,6 +848,7 @@ with_match_limit with_match_limit_recursion enable_valgrind enable_coverage +enable_fuzz_support ' ac_precious_vars='build_alias host_alias @@ -1401,7 +1407,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PCRE2 10.20 to adapt to many kinds of systems. +\`configure' configures PCRE2 10.23 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1471,7 +1477,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PCRE2 10.20:";; + short | recursive ) echo "Configuration of PCRE2 10.23:";; esac cat <<\_ACEOF @@ -1497,6 +1503,8 @@ Optional Features: --enable-debug enable debugging code --enable-jit enable Just-In-Time compiling support --disable-pcre2grep-jit disable JIT support in pcre2grep + --disable-pcre2grep-callout + disable callout script support in pcre2grep --enable-rebuild-chartables rebuild character tables in current locale --disable-unicode disable Unicode support @@ -1508,6 +1516,8 @@ Optional Features: use CR, LF, or CRLF as newline sequence --enable-newline-is-any use any valid Unicode newline sequence --enable-bsr-anycrlf \R matches only CR, LF, CRLF by default + --enable-never-backslash-C + use of \C causes an error --enable-ebcdic assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables @@ -1522,8 +1532,9 @@ Optional Features: link pcre2test with libedit --enable-pcre2test-libreadline link pcre2test with libreadline - --enable-valgrind valgrind support + --enable-valgrind enable valgrind support --enable-coverage enable code coverage reports using gcov + --enable-fuzz-support enable fuzzer support Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1537,7 +1548,11 @@ Optional Packages: --with-sysroot[=DIR] Search for dependent libraries within DIR (or the compiler's sysroot if not specified). --with-pcre2grep-bufsize=N - pcre2grep buffer size (default=20480, minimum=8192) + pcre2grep initial buffer size (default=20480, + minimum=8192) + --with-pcre2grep-max-bufsize=N + pcre2grep maximum buffer size (default=1048576, + minimum=8192) --with-link-size=N internal link size (2, 3, or 4 allowed; default=2) --with-parens-nest-limit=N nested parentheses limit (default=250) @@ -1635,7 +1650,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PCRE2 configure 10.20 +PCRE2 configure 10.23 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1724,6 +1739,93 @@ fi } # ac_fn_c_try_cpp +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + # ac_fn_c_try_run LINENO # ---------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes @@ -1986,93 +2088,6 @@ $as_echo "$ac_res" >&6; } } # ac_fn_c_check_func -# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists, giving a warning if it cannot be compiled using -# the include files in INCLUDES and setting the cache variable VAR -# accordingly. -ac_fn_c_check_header_mongrel () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if eval \${$3+:} false; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } -else - # Is the header compilable? -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 -$as_echo_n "checking $2 usability... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_header_compiler=yes -else - ac_header_compiler=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 -$as_echo "$ac_header_compiler" >&6; } - -# Is the header present? -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 -$as_echo_n "checking $2 presence... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include <$2> -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - ac_header_preproc=yes -else - ac_header_preproc=no -fi -rm -f conftest.err conftest.i conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 -$as_echo "$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( - yes:no: ) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 -$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 -$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} - ;; - no:yes:* ) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 -$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 -$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 -$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 -$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 -$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} - ;; -esac - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - eval "$3=\$ac_header_compiler" -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_mongrel - # ac_fn_c_check_type LINENO TYPE VAR INCLUDES # ------------------------------------------- # Tests whether TYPE exists after having included INCLUDES, setting cache @@ -2130,7 +2145,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PCRE2 $as_me 10.20, which was +It was created by PCRE2 $as_me 10.23, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2994,7 +3009,7 @@ fi # Define the identity of the package. PACKAGE='pcre2' - VERSION='10.20' + VERSION='10.23' cat >>confdefs.h <<_ACEOF @@ -3129,69 +3144,18 @@ AM_BACKSLASH='\' ac_config_headers="$ac_config_headers src/config.h" -# This is a new thing required to stop a warning from automake 1.12 -DEPDIR="${am__leading_dot}deps" - -ac_config_commands="$ac_config_commands depfiles" +# This was added at the suggestion of libtoolize (03-Jan-10) -am_make=${MAKE-make} -cat > confinc << 'END' -am__doit: - @echo this is the am__doit target -.PHONY: am__doit -END -# If we don't find an include directive, just comment out the code. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 -$as_echo_n "checking for style of include used by $am_make... " >&6; } -am__include="#" -am__quote= -_am_result=none -# First try GNU make style include. -echo "include confinc" > confmf -# Ignore all kinds of additional output from 'make'. -case `$am_make -s -f confmf 2> /dev/null` in #( -*the\ am__doit\ target*) - am__include=include - am__quote= - _am_result=GNU - ;; -esac -# Now try BSD make style include. -if test "$am__include" = "#"; then - echo '.include "confinc"' > confmf - case `$am_make -s -f confmf 2> /dev/null` in #( - *the\ am__doit\ target*) - am__include=.include - am__quote="\"" - _am_result=BSD - ;; - esac -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 -$as_echo "$_am_result" >&6; } -rm -f confinc confmf - -# Check whether --enable-dependency-tracking was given. -if test "${enable_dependency_tracking+set}" = set; then : - enableval=$enable_dependency_tracking; -fi - -if test "x$enable_dependency_tracking" != xno; then - am_depcomp="$ac_aux_dir/depcomp" - AMDEPBACKSLASH='\' - am__nodep='_no' -fi - if test "x$enable_dependency_tracking" != xno; then - AMDEP_TRUE= - AMDEP_FALSE='#' -else - AMDEP_TRUE='#' - AMDEP_FALSE= -fi +# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any +# other compiler. There doesn't seem to be a standard way of getting rid of the +# -g (which I don't think is needed for a production library). This fudge seems +# to achieve the necessary. First, we remember the externally set values of +# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is +# not set, it will be set to Autoconf's defaults. Afterwards, if the original +# values were not set, remove the -g from the Autoconf defaults. +remember_set_CFLAGS="$CFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' @@ -4040,918 +4004,69 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu +DEPDIR="${am__leading_dot}deps" -depcc="$CC" am_compiler_list= - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 -$as_echo_n "checking dependency style of $depcc... " >&6; } -if ${am_cv_CC_dependencies_compiler_type+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then - # We make a subdir and do the tests there. Otherwise we can end up - # making bogus files that we don't know about and never remove. For - # instance it was reported that on HP-UX the gcc test will end up - # making a dummy file named 'D' -- because '-MD' means "put the output - # in D". - rm -rf conftest.dir - mkdir conftest.dir - # Copy depcomp to subdir because otherwise we won't find it if we're - # using a relative directory. - cp "$am_depcomp" conftest.dir - cd conftest.dir - # We will build objects and dependencies in a subdirectory because - # it helps to detect inapplicable dependency modes. For instance - # both Tru64's cc and ICC support -MD to output dependencies as a - # side effect of compilation, but ICC will put the dependencies in - # the current directory while Tru64 will put them in the object - # directory. - mkdir sub - - am_cv_CC_dependencies_compiler_type=none - if test "$am_compiler_list" = ""; then - am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` - fi - am__universal=false - case " $depcc " in #( - *\ -arch\ *\ -arch\ *) am__universal=true ;; - esac - - for depmode in $am_compiler_list; do - # Setup a source with many dependencies, because some compilers - # like to wrap large dependency lists on column 80 (with \), and - # we should not choose a depcomp mode which is confused by this. - # - # We need to recreate these files for each test, as the compiler may - # overwrite some of them when testing with obscure command lines. - # This happens at least with the AIX C compiler. - : > sub/conftest.c - for i in 1 2 3 4 5 6; do - echo '#include "conftst'$i'.h"' >> sub/conftest.c - # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with - # Solaris 10 /bin/sh. - echo '/* dummy */' > sub/conftst$i.h - done - echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf - - # We check with '-c' and '-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle '-M -o', and we need to detect this. Also, some Intel - # versions had trouble with output in subdirs. - am__obj=sub/conftest.${OBJEXT-o} - am__minus_obj="-o $am__obj" - case $depmode in - gcc) - # This depmode causes a compiler race in universal mode. - test "$am__universal" = false || continue - ;; - nosideeffect) - # After this tag, mechanisms are not by side-effect, so they'll - # only be used when explicitly requested. - if test "x$enable_dependency_tracking" = xyes; then - continue - else - break - fi - ;; - msvc7 | msvc7msys | msvisualcpp | msvcmsys) - # This compiler won't grok '-c -o', but also, the minuso test has - # not run yet. These depmodes are late enough in the game, and - # so weak that their functioning should not be impacted. - am__obj=conftest.${OBJEXT-o} - am__minus_obj= - ;; - none) break ;; - esac - if depmode=$depmode \ - source=sub/conftest.c object=$am__obj \ - depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ - $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ - >/dev/null 2>conftest.err && - grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && - grep $am__obj sub/conftest.Po > /dev/null 2>&1 && - ${MAKE-make} -s -f confmf > /dev/null 2>&1; then - # icc doesn't choke on unknown options, it will just issue warnings - # or remarks (even with -Werror). So we grep stderr for any message - # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: - # icc: Command line warning: ignoring option '-M'; no argument required - # The diagnosis changed in icc 8.0: - # icc: Command line remark: option '-MP' not supported - if (grep 'ignoring option' conftest.err || - grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else - am_cv_CC_dependencies_compiler_type=$depmode - break - fi - fi - done - - cd .. - rm -rf conftest.dir -else - am_cv_CC_dependencies_compiler_type=none -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 -$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } -CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type - - if - test "x$enable_dependency_tracking" != xno \ - && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then - am__fastdepCC_TRUE= - am__fastdepCC_FALSE='#' -else - am__fastdepCC_TRUE='#' - am__fastdepCC_FALSE= -fi +ac_config_commands="$ac_config_commands depfiles" - -if test -n "$ac_tool_prefix"; then - for ac_prog in ar lib "link -lib" - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_AR+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$AR"; then - ac_cv_prog_AR="$AR" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_AR="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -AR=$ac_cv_prog_AR -if test -n "$AR"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 -$as_echo "$AR" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$AR" && break - done -fi -if test -z "$AR"; then - ac_ct_AR=$AR - for ac_prog in ar lib "link -lib" -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_AR+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_AR"; then - ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_AR="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_AR=$ac_cv_prog_ac_ct_AR -if test -n "$ac_ct_AR"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 -$as_echo "$ac_ct_AR" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$ac_ct_AR" && break -done - - if test "x$ac_ct_AR" = x; then - AR="false" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - AR=$ac_ct_AR - fi -fi - -: ${AR=ar} - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the archiver ($AR) interface" >&5 -$as_echo_n "checking the archiver ($AR) interface... " >&6; } -if ${am_cv_ar_interface+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - am_cv_ar_interface=ar - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -int some_variable = 0; -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 - (eval $am_ar_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - if test "$ac_status" -eq 0; then - am_cv_ar_interface=ar - else - am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 - (eval $am_ar_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - if test "$ac_status" -eq 0; then - am_cv_ar_interface=lib - else - am_cv_ar_interface=unknown - fi - fi - rm -f conftest.lib libconftest.a - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5 -$as_echo "$am_cv_ar_interface" >&6; } - -case $am_cv_ar_interface in -ar) - ;; -lib) - # Microsoft lib, so override with the ar-lib wrapper script. - # FIXME: It is wrong to rewrite AR. - # But if we don't then we get into trouble of one sort or another. - # A longer-term fix would be to have automake use am__AR in this case, - # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something - # similar. - AR="$am_aux_dir/ar-lib $AR" - ;; -unknown) - as_fn_error $? "could not determine $AR interface" "$LINENO" 5 +am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo this is the am__doit target +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 +$as_echo_n "checking for style of include used by $am_make... " >&6; } +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# Ignore all kinds of additional output from 'make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU ;; esac +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac +fi -# This was added at the suggestion of libtoolize (03-Jan-10) +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 +$as_echo "$_am_result" >&6; } +rm -f confinc confmf +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi -# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any -# other compiler. There doesn't seem to be a standard way of getting rid of the -# -g (which I don't think is needed for a production library). This fudge seems -# to achieve the necessary. First, we remember the externally set values of -# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is -# not set, it will be set to Autoconf's defaults. Afterwards, if the original -# values were not set, remove the -g from the Autoconf defaults. - -remember_set_CFLAGS="$CFLAGS" - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. -set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + AMDEP_TRUE='#' + AMDEP_FALSE= fi -fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi - -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. -set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - fi -fi -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - ac_prog_rejected=no -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# != 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" - fi -fi -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$CC" && break - done -fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cl.exe -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$ac_ct_CC" && break -done - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -fi - -fi - - -test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "no acceptable C compiler found in \$PATH -See \`config.log' for more details" "$LINENO" 5; } - -# Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 -set X $ac_compile -ac_compiler=$2 -for ac_option in --version -v -V -qversion; do - { { ac_try="$ac_compiler $ac_option >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compiler $ac_option >&5") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - sed '10a\ -... rest of stderr output deleted ... - 10q' conftest.err >conftest.er1 - cat conftest.er1 >&5 - fi - rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -done - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if ${ac_cv_c_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_compiler_gnu=yes -else - ac_compiler_gnu=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } -if test $ac_compiler_gnu = yes; then - GCC=yes -else - GCC= -fi -ac_test_CFLAGS=${CFLAGS+set} -ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if ${ac_cv_prog_cc_g+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes -else - CFLAGS="" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - -else - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if ${ac_cv_prog_cc_c89+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -struct stat; -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} -_ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_c89=$ac_arg -fi -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC - -fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c89" != xno; then : - -fi - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 -$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } -if ${am_cv_prog_cc_c_o+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF - # Make sure it works both with $CC and with simple cc. - # Following AC_PROG_CC_C_O, we do the test twice because some - # compilers refuse to overwrite an existing .o file with -o, - # though they will create one. - am_cv_prog_cc_c_o=yes - for am_i in 1 2; do - if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 - ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } \ - && test -f conftest2.$ac_objext; then - : OK - else - am_cv_prog_cc_c_o=no - break - fi - done - rm -f core conftest* - unset am_i -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 -$as_echo "$am_cv_prog_cc_c_o" >&6; } -if test "$am_cv_prog_cc_c_o" != yes; then - # Losing compiler, so override with the script. - # FIXME: It is wrong to rewrite CC. - # But if we don't then we get into trouble of one sort or another. - # A longer-term fix would be to have automake use am__CC in this case, - # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" - CC="$am_aux_dir/compile $CC" -fi -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - depcc="$CC" am_compiler_list= @@ -5083,18 +4198,6 @@ fi -if test "x$remember_set_CFLAGS" = "x" -then - if test "$CFLAGS" = "-g -O2" - then - CFLAGS="-O2" - elif test "$CFLAGS" = "-g" - then - CFLAGS="" - fi -fi - -# Check for a 64-bit integer type ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -5492,6 +4595,253 @@ fi done + + ac_fn_c_check_header_mongrel "$LINENO" "minix/config.h" "ac_cv_header_minix_config_h" "$ac_includes_default" +if test "x$ac_cv_header_minix_config_h" = xyes; then : + MINIX=yes +else + MINIX= +fi + + + if test "$MINIX" = yes; then + +$as_echo "#define _POSIX_SOURCE 1" >>confdefs.h + + +$as_echo "#define _POSIX_1_SOURCE 2" >>confdefs.h + + +$as_echo "#define _MINIX 1" >>confdefs.h + + fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether it is safe to define __EXTENSIONS__" >&5 +$as_echo_n "checking whether it is safe to define __EXTENSIONS__... " >&6; } +if ${ac_cv_safe_to_define___extensions__+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +# define __EXTENSIONS__ 1 + $ac_includes_default +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_safe_to_define___extensions__=yes +else + ac_cv_safe_to_define___extensions__=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_safe_to_define___extensions__" >&5 +$as_echo "$ac_cv_safe_to_define___extensions__" >&6; } + test $ac_cv_safe_to_define___extensions__ = yes && + $as_echo "#define __EXTENSIONS__ 1" >>confdefs.h + + $as_echo "#define _ALL_SOURCE 1" >>confdefs.h + + $as_echo "#define _GNU_SOURCE 1" >>confdefs.h + + $as_echo "#define _POSIX_PTHREAD_SEMANTICS 1" >>confdefs.h + + $as_echo "#define _TANDEM_SOURCE 1" >>confdefs.h + + + +if test "x$remember_set_CFLAGS" = "x" +then + if test "$CFLAGS" = "-g -O2" + then + CFLAGS="-O2" + elif test "$CFLAGS" = "-g" + then + CFLAGS="" + fi +fi + +# This is a new thing required to stop a warning from automake 1.12 +if test -n "$ac_tool_prefix"; then + for ac_prog in ar lib "link -lib" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +$as_echo "$AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar lib "link -lib" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +$as_echo "$ac_ct_AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the archiver ($AR) interface" >&5 +$as_echo_n "checking the archiver ($AR) interface... " >&6; } +if ${am_cv_ar_interface+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + am_cv_ar_interface=ar + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int some_variable = 0; +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5 +$as_echo "$am_cv_ar_interface" >&6; } + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + as_fn_error $? "could not determine $AR interface" "$LINENO" 5 + ;; +esac + + +# Check for a 64-bit integer type ac_fn_c_find_intX_t "$LINENO" "64" "ac_cv_c_int64_t" case $ac_cv_c_int64_t in #( no|yes) ;; #( @@ -13590,9 +12940,9 @@ _ACEOF # Versioning PCRE2_MAJOR="10" -PCRE2_MINOR="20" +PCRE2_MINOR="23" PCRE2_PRERELEASE="" -PCRE2_DATE="2015-06-30" +PCRE2_DATE="2017-02-14" if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" then @@ -13700,6 +13050,15 @@ else fi +# Handle --disable-pcre2grep-callout (enabled by default) +# Check whether --enable-pcre2grep-callout was given. +if test "${enable_pcre2grep_callout+set}" = set; then : + enableval=$enable_pcre2grep_callout; +else + enable_pcre2grep_callout=yes +fi + + # Handle --enable-rebuild-chartables # Check whether --enable-rebuild-chartables was given. if test "${enable_rebuild_chartables+set}" = set; then : @@ -13756,6 +13115,15 @@ else fi +# Handle --enable-never-backslash-C +# Check whether --enable-never-backslash-C was given. +if test "${enable_never_backslash_C+set}" = set; then : + enableval=$enable_never_backslash_C; +else + enable_never_backslash_C=no +fi + + # Handle --enable-ebcdic # Check whether --enable-ebcdic was given. if test "${enable_ebcdic+set}" = set; then : @@ -13811,6 +13179,16 @@ else fi +# Handle --with-pcre2grep-max-bufsize=N + +# Check whether --with-pcre2grep-max-bufsize was given. +if test "${with_pcre2grep_max_bufsize+set}" = set; then : + withval=$with_pcre2grep_max_bufsize; +else + with_pcre2grep_max_bufsize=1048576 +fi + + # Handle --enable-pcre2test-libedit # Check whether --enable-pcre2test-libedit was given. if test "${enable_pcre2test_libedit+set}" = set; then : @@ -13894,6 +13272,15 @@ else fi +# Handle --enable-fuzz-support +# Check whether --enable-fuzz_support was given. +if test "${enable_fuzz_support+set}" = set; then : + enableval=$enable_fuzz_support; +else + enable_fuzz_support=no +fi + + # Set the default value for pcre2-8 if test "x$enable_pcre2_8" = "xunset" then @@ -13946,13 +13333,17 @@ fi # Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. # Also check that UTF support is not requested, because PCRE2 cannot handle # EBCDIC and UTF in the same build. To do so it would need to use different -# character constants depending on the mode. +# character constants depending on the mode. Also, EBCDIC cannot be used with +# 16-bit and 32-bit libraries. # if test "x$enable_ebcdic" = "xyes"; then enable_rebuild_chartables=yes if test "x$enable_unicode" = "xyes"; then as_fn_error $? "support for EBCDIC and Unicode cannot be enabled at the same time" "$LINENO" 5 fi + if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then + as_fn_error $? "EBCDIC support is available only for the 8-bit library" "$LINENO" 5 + fi fi # Check argument to --with-link-size @@ -14103,6 +13494,18 @@ fi done +for ac_header in sys/wait.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/wait.h" "ac_cv_header_sys_wait_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_wait_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_WAIT_H 1 +_ACEOF + HAVE_SYS_WAIT_H=1 +fi + +done + # Conditional compilation if test "x$enable_pcre2_8" = "xyes"; then @@ -14169,6 +13572,19 @@ else WITH_VALGRIND_FALSE= fi + if test "x$enable_fuzz_support" = "xyes"; then + WITH_FUZZ_SUPPORT_TRUE= + WITH_FUZZ_SUPPORT_FALSE='#' +else + WITH_FUZZ_SUPPORT_TRUE='#' + WITH_FUZZ_SUPPORT_FALSE= +fi + + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi # Checks for typedefs, structures, and compiler characteristics. @@ -14267,7 +13683,7 @@ fi # Checks for library functions. -for ac_func in bcopy memmove strerror +for ac_func in bcopy memmove strerror mkostemp secure_getenv do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" @@ -14770,8 +14186,6 @@ fi fi -# This facilitates -ansi builds under Linux - PCRE2_STATIC_CFLAG="" if test "x$enable_shared" = "xno" ; then @@ -15224,6 +14638,17 @@ $as_echo "#define SUPPORT_PCRE2GREP_JIT /**/" >>confdefs.h fi +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + as_fn_error $? "Callout script support needs sys/wait.h." "$LINENO" 5 + fi + fi + +$as_echo "#define SUPPORT_PCRE2GREP_CALLOUT /**/" >>confdefs.h + +fi + if test "$enable_unicode" = "yes"; then $as_echo "#define SUPPORT_UNICODE /**/" >>confdefs.h @@ -15254,7 +14679,15 @@ $as_echo "$as_me: WARNING: $with_pcre2grep_bufsize is too small for --with-pcre2 with_pcre2grep_bufsize="8192" else if test $? -gt 1 ; then - as_fn_error $? "Bad value for --with-pcre2grep-bufsize" "$LINENO" 5 + as_fn_error $? "Bad value for --with-pcre2grep-bufsize" "$LINENO" 5 + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + as_fn_error $? "Bad value for --with-pcre2grep-max-bufsize" "$LINENO" 5 fi fi @@ -15264,6 +14697,12 @@ cat >>confdefs.h <<_ACEOF _ACEOF + +cat >>confdefs.h <<_ACEOF +#define PCRE2GREP_MAX_BUFSIZE $with_pcre2grep_max_bufsize +_ACEOF + + if test "$enable_pcre2test_libedit" = "yes"; then $as_echo "#define SUPPORT_LIBEDIT /**/" >>confdefs.h @@ -15287,6 +14726,12 @@ $as_echo "#define BSR_ANYCRLF /**/" >>confdefs.h fi +if test "$enable_never_backslash_C" = "yes"; then + +$as_echo "#define NEVER_BACKSLASH_C /**/" >>confdefs.h + +fi + cat >>confdefs.h <<_ACEOF #define LINK_SIZE $with_link_size @@ -15361,16 +14806,16 @@ esac # are m4 variables, assigned above. EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \ - $NO_UNDEFINED -version-info 2:0:2" + $NO_UNDEFINED -version-info 5:0:5" EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \ - $NO_UNDEFINED -version-info 2:0:2" + $NO_UNDEFINED -version-info 5:0:5" EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \ - $NO_UNDEFINED -version-info 2:0:2" + $NO_UNDEFINED -version-info 5:0:5" EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \ - $NO_UNDEFINED -version-info 0:0:0" + $NO_UNDEFINED -version-info 1:1:0" @@ -15976,10 +15421,6 @@ if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi -if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then - as_fn_error $? "conditional \"am__fastdepCC\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi if test -z "${WITH_PCRE2_8_TRUE}" && test -z "${WITH_PCRE2_8_FALSE}"; then as_fn_error $? "conditional \"WITH_PCRE2_8\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -16012,6 +15453,10 @@ if test -z "${WITH_VALGRIND_TRUE}" && test -z "${WITH_VALGRIND_FALSE}"; then as_fn_error $? "conditional \"WITH_VALGRIND\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${WITH_FUZZ_SUPPORT_TRUE}" && test -z "${WITH_FUZZ_SUPPORT_FALSE}"; then + as_fn_error $? "conditional \"WITH_FUZZ_SUPPORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${WITH_GCOV_TRUE}" && test -z "${WITH_GCOV_FALSE}"; then as_fn_error $? "conditional \"WITH_GCOV\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -16413,7 +15858,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PCRE2 $as_me 10.20, which was +This file was extended by PCRE2 $as_me 10.23, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -16479,7 +15924,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -PCRE2 config.status 10.20 +PCRE2 config.status 10.23 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" @@ -18194,41 +17639,45 @@ cat <pcre2_callout_enumerate   Enumerate callouts in a compiled pattern +pcre2_code_copy +   Copy a compiled pattern + +pcre2_code_copy_with_tables +   Copy a compiled pattern and its character tables + pcre2_code_free   Free a compiled pattern @@ -210,9 +216,15 @@ in the library. pcre2_set_match_limit   Set the match limit +pcre2_set_max_pattern_length +   Set the maximum length of pattern + pcre2_set_newline   Set the newline convention +pcre2_set_offset_limit +   Set the offset limit + pcre2_set_parens_nest_limit   Set the parentheses nesting limit diff --git a/pcre2/doc/html/pcre2.html b/pcre2/doc/html/pcre2.html index e94b355a3..07ab8e9e8 100644 --- a/pcre2/doc/html/pcre2.html +++ b/pcre2/doc/html/pcre2.html @@ -126,8 +126,10 @@ running redundant checks.

    The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to problems, because it may leave the current matching point in the middle of a -multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to -lock out the use of \C, causing a compile-time error if it is encountered. +multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an +application to lock out the use of \C, causing a compile-time error if it is +encountered. It is also possible to build PCRE2 with the use of \C permanently +disabled.

    Another way that performance can be hit is by running a pattern that has a very @@ -187,7 +189,7 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.


    REVISION

    -Last updated: 13 April 2015 +Last updated: 16 October 2015
    Copyright © 1997-2015 University of Cambridge.
    diff --git a/pcre2/doc/html/pcre2_code_copy.html b/pcre2/doc/html/pcre2_code_copy.html new file mode 100644 index 000000000..667d7b7ff --- /dev/null +++ b/pcre2/doc/html/pcre2_code_copy.html @@ -0,0 +1,43 @@ + + +pcre2_code_copy specification + + +

    pcre2_code_copy man page

    +

    +Return to the PCRE2 index page. +

    +

    +This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
    +
    +SYNOPSIS +
    +

    +#include <pcre2.h> +

    +

    +pcre2_code *pcre2_code_copy(const pcre2_code *code); +

    +
    +DESCRIPTION +
    +

    +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +pcre2_jit_compile(), the copy can be used only for non-JIT matching. The +pointer to the character tables is copied, not the tables themselves (see +pcre2_code_copy_with_tables()). The yield of the function is NULL if +code is NULL or if sufficient memory cannot be obtained. +

    +

    +There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

    +Return to the PCRE2 index page. +

    diff --git a/pcre2/doc/html/pcre2_code_copy_with_tables.html b/pcre2/doc/html/pcre2_code_copy_with_tables.html new file mode 100644 index 000000000..67b2e1ffd --- /dev/null +++ b/pcre2/doc/html/pcre2_code_copy_with_tables.html @@ -0,0 +1,44 @@ + + +pcre2_code_copy_with_tables specification + + +

    pcre2_code_copy_with_tables man page

    +

    +Return to the PCRE2 index page. +

    +

    +This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
    +
    +SYNOPSIS +
    +

    +#include <pcre2.h> +

    +

    +pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +

    +
    +DESCRIPTION +
    +

    +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +pcre2_jit_compile(), the copy can be used only for non-JIT matching. +Unlike pcre2_code_copy(), a separate copy of the character tables is also +made, with the new code pointing to it. This memory will be automatically freed +when pcre2_code_free() is called. The yield of the function is NULL if +code is NULL or if sufficient memory cannot be obtained. +

    +

    +There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

    +Return to the PCRE2 index page. +

    diff --git a/pcre2/doc/html/pcre2_code_free.html b/pcre2/doc/html/pcre2_code_free.html index 405d16466..0477abe4c 100644 --- a/pcre2/doc/html/pcre2_code_free.html +++ b/pcre2/doc/html/pcre2_code_free.html @@ -19,7 +19,7 @@ SYNOPSIS #include <pcre2.h>

    -pcre2_code_free(pcre2_code *code); +void pcre2_code_free(pcre2_code *code);


    DESCRIPTION diff --git a/pcre2/doc/html/pcre2_dfa_match.html b/pcre2/doc/html/pcre2_dfa_match.html index e137a14a8..7eca2ba9f 100644 --- a/pcre2/doc/html/pcre2_dfa_match.html +++ b/pcre2/doc/html/pcre2_dfa_match.html @@ -45,8 +45,8 @@ is pcre2_match().) The arguments for this function are: wscount Number of elements in the vector For pcre2_dfa_match(), a match context is needed only if you want to set -up a callout function. The length and startoffset values are code -units, not characters. The options are: +up a callout function or specify the recursion limit. The length and +startoffset values are code units, not characters. The options are:
       PCRE2_ANCHORED          Match only at the first position
       PCRE2_NOTBOL            Subject is not the beginning of a line
    diff --git a/pcre2/doc/html/pcre2_get_error_message.html b/pcre2/doc/html/pcre2_get_error_message.html
    index 5d422913e..26c80febe 100644
    --- a/pcre2/doc/html/pcre2_get_error_message.html
    +++ b/pcre2/doc/html/pcre2_get_error_message.html
    @@ -35,7 +35,10 @@ errors are negative numbers. The arguments are:
       bufflen     the length of the buffer (code units)
     
    The function returns the length of the message, excluding the trailing zero, or -a negative error code if the buffer is too small. +the negative error code PCRE2_ERROR_NOMEMORY if the buffer is too small. In +this case, the returned message is truncated (but still with a trailing zero). +If errorcode does not contain a recognized error code number, the +negative value PCRE2_ERROR_BADDATA is returned.

    There is a complete description of the PCRE2 native API in the diff --git a/pcre2/doc/html/pcre2_match_data_create.html b/pcre2/doc/html/pcre2_match_data_create.html index 03cbe244a..8d0321b55 100644 --- a/pcre2/doc/html/pcre2_match_data_create.html +++ b/pcre2/doc/html/pcre2_match_data_create.html @@ -19,7 +19,7 @@ SYNOPSIS #include <pcre2.h>

    -pcre2_match_data_create(uint32_t ovecsize, +pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, pcre2_general_context *gcontext);


    diff --git a/pcre2/doc/html/pcre2_match_data_create_from_pattern.html b/pcre2/doc/html/pcre2_match_data_create_from_pattern.html index 3f8d3f51a..f40cf1e12 100644 --- a/pcre2/doc/html/pcre2_match_data_create_from_pattern.html +++ b/pcre2/doc/html/pcre2_match_data_create_from_pattern.html @@ -19,8 +19,8 @@ SYNOPSIS #include <pcre2.h>

    -pcre2_match_data_create_from_pattern(const pcre2_code *code, - pcre2_general_context *gcontext); +pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext);


    DESCRIPTION diff --git a/pcre2/doc/html/pcre2_pattern_info.html b/pcre2/doc/html/pcre2_pattern_info.html index 4e007ee4c..b4cd6f5b1 100644 --- a/pcre2/doc/html/pcre2_pattern_info.html +++ b/pcre2/doc/html/pcre2_pattern_info.html @@ -42,19 +42,20 @@ request are as follows: PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only PCRE2_INFO_CAPTURECOUNT Number of capturing subpatterns PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL - PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1 PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information 0 nothing set 1 first code unit is set 2 start of string or after newline + PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1 + PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0 - PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1 PCRE2_INFO_LASTCODETYPE Type of must-be-present information 0 nothing set 1 code unit is set + PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1 PCRE2_INFO_MATCHEMPTY 1 if the pattern can match an empty string, 0 otherwise PCRE2_INFO_MATCHLIMIT Match limit if set, @@ -62,8 +63,8 @@ request are as follows: PCRE2_INFO_MAXLOOKBEHIND Length (in characters) of the longest lookbehind assertion PCRE2_INFO_MINLENGTH Lower bound length of matching strings - PCRE2_INFO_NAMEENTRYSIZE Size of name table entries PCRE2_INFO_NAMECOUNT Number of named subpatterns + PCRE2_INFO_NAMEENTRYSIZE Size of name table entries PCRE2_INFO_NAMETABLE Pointer to name table PCRE2_CONFIG_NEWLINE Code for the newline sequence: PCRE2_NEWLINE_CR diff --git a/pcre2/doc/html/pcre2_serialize_decode.html b/pcre2/doc/html/pcre2_serialize_decode.html index 26948bdea..688398ff2 100644 --- a/pcre2/doc/html/pcre2_serialize_decode.html +++ b/pcre2/doc/html/pcre2_serialize_decode.html @@ -20,7 +20,7 @@ SYNOPSIS

    int32_t pcre2_serialize_decode(pcre2_code **codes, - int32_t number_of_codes, const uint32_t *bytes, + int32_t number_of_codes, const uint8_t *bytes, pcre2_general_context *gcontext);


    diff --git a/pcre2/doc/html/pcre2_serialize_encode.html b/pcre2/doc/html/pcre2_serialize_encode.html index a6efcd7d0..08cc46d11 100644 --- a/pcre2/doc/html/pcre2_serialize_encode.html +++ b/pcre2/doc/html/pcre2_serialize_encode.html @@ -19,8 +19,8 @@ SYNOPSIS #include <pcre2.h>

    -int32_t pcre2_serialize_encode(pcre2_code **codes, - int32_t number_of_codes, uint32_t **serialized_bytes, +int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext);


    diff --git a/pcre2/doc/html/pcre2_set_max_pattern_length.html b/pcre2/doc/html/pcre2_set_max_pattern_length.html new file mode 100644 index 000000000..f6e422aa5 --- /dev/null +++ b/pcre2/doc/html/pcre2_set_max_pattern_length.html @@ -0,0 +1,43 @@ + + +pcre2_set_max_pattern_length specification + + +

    pcre2_set_max_pattern_length man page

    +

    +Return to the PCRE2 index page. +

    +

    +This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
    +
    +SYNOPSIS +
    +

    +#include <pcre2.h> +

    +

    +int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +

    +
    +DESCRIPTION +
    +

    +This function sets, in a compile context, the maximum text length (in code +units) of the pattern that can be compiled. The result is always zero. If a +longer pattern is passed to pcre2_compile() there is an immediate error +return. The default is effectively unlimited, being the largest value a +PCRE2_SIZE variable can hold. +

    +

    +There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

    +Return to the PCRE2 index page. +

    diff --git a/pcre2/doc/html/pcre2_set_offset_limit.html b/pcre2/doc/html/pcre2_set_offset_limit.html new file mode 100644 index 000000000..6d9a85c64 --- /dev/null +++ b/pcre2/doc/html/pcre2_set_offset_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_offset_limit specification + + +

    pcre2_set_offset_limit man page

    +

    +Return to the PCRE2 index page. +

    +

    +This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
    +
    +SYNOPSIS +
    +

    +#include <pcre2.h> +

    +

    +int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +

    +
    +DESCRIPTION +
    +

    +This function sets the offset limit field in a match context. The result is +always zero. +

    +

    +There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

    +Return to the PCRE2 index page. +

    diff --git a/pcre2/doc/html/pcre2_substitute.html b/pcre2/doc/html/pcre2_substitute.html index 0976947dd..2dfd09475 100644 --- a/pcre2/doc/html/pcre2_substitute.html +++ b/pcre2/doc/html/pcre2_substitute.html @@ -59,20 +59,25 @@ units, not characters, as is the contents of the variable pointed at by outlengthptr, which is updated to the actual length of the new string. The options are:
    -  PCRE2_ANCHORED          Match only at the first position
    -  PCRE2_NOTBOL            Subject string is not the beginning of a line
    -  PCRE2_NOTEOL            Subject string is not the end of a line
    -  PCRE2_NOTEMPTY          An empty string is not a valid match
    -  PCRE2_NOTEMPTY_ATSTART  An empty string at the start of the subject
    -                           is not a valid match
    -  PCRE2_NO_UTF_CHECK      Do not check the subject or replacement for
    -                           UTF validity (only relevant if PCRE2_UTF
    -                           was set at compile time)
    -  PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
    +  PCRE2_ANCHORED             Match only at the first position
    +  PCRE2_NOTBOL               Subject is not the beginning of a line
    +  PCRE2_NOTEOL               Subject is not the end of a line
    +  PCRE2_NOTEMPTY             An empty string is not a valid match
    +  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the
    +                              subject is not a valid match
    +  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement
    +                              for UTF validity (only relevant if
    +                              PCRE2_UTF was set at compile time)
    +  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
    +  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
    +  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
    +  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
    +  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
     
    The function returns the number of substitutions, which may be zero if there were no matches. The result can be greater than one only when -PCRE2_SUBSTITUTE_GLOBAL is set. +PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code +is returned.

    There is a complete description of the PCRE2 native API in the diff --git a/pcre2/doc/html/pcre2api.html b/pcre2/doc/html/pcre2api.html index 60d2bf569..6ffa69fb6 100644 --- a/pcre2/doc/html/pcre2api.html +++ b/pcre2/doc/html/pcre2api.html @@ -43,16 +43,17 @@ please consult the man page, in case the conversion went wrong.

  • HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
  • OTHER INFORMATION ABOUT A MATCH
  • ERROR RETURNS FROM pcre2_match() -
  • EXTRACTING CAPTURED SUBSTRINGS BY NUMBER -
  • EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS -
  • EXTRACTING CAPTURED SUBSTRINGS BY NAME -
  • CREATING A NEW STRING WITH SUBSTITUTIONS -
  • DUPLICATE SUBPATTERN NAMES -
  • FINDING ALL POSSIBLE MATCHES AT ONE POSITION -
  • MATCHING A PATTERN: THE ALTERNATIVE FUNCTION -
  • SEE ALSO -
  • AUTHOR -
  • REVISION +
  • OBTAINING A TEXTUAL ERROR MESSAGE +
  • EXTRACTING CAPTURED SUBSTRINGS BY NUMBER +
  • EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS +
  • EXTRACTING CAPTURED SUBSTRINGS BY NAME +
  • CREATING A NEW STRING WITH SUBSTITUTIONS +
  • DUPLICATE SUBPATTERN NAMES +
  • FINDING ALL POSSIBLE MATCHES AT ONE POSITION +
  • MATCHING A PATTERN: THE ALTERNATIVE FUNCTION +
  • SEE ALSO +
  • AUTHOR +
  • REVISION

    #include <pcre2.h> @@ -70,15 +71,15 @@ document for an overview of all the PCRE2 documentation. pcre2_compile_context *ccontext);

    -pcre2_code_free(pcre2_code *code); +void pcre2_code_free(pcre2_code *code);

    -pcre2_match_data_create(uint32_t ovecsize, +pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, pcre2_general_context *gcontext);

    -pcre2_match_data_create_from_pattern(const pcre2_code *code, - pcre2_general_context *gcontext); +pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext);

    int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, @@ -143,6 +144,10 @@ document for an overview of all the PCRE2 documentation. const unsigned char *tables);

    +int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +
    +
    int pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t value);
    @@ -176,6 +181,10 @@ document for an overview of all the PCRE2 documentation. uint32_t value);

    +int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +
    +
    int pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t value);
    @@ -266,12 +275,12 @@ document for an overview of all the PCRE2 documentation.
    PCRE2 NATIVE API SERIALIZATION FUNCTIONS

    int32_t pcre2_serialize_decode(pcre2_code **codes, - int32_t number_of_codes, const uint32_t *bytes, + int32_t number_of_codes, const uint8_t *bytes, pcre2_general_context *gcontext);

    -int32_t pcre2_serialize_encode(pcre2_code **codes, - int32_t number_of_codes, uint32_t **serialized_bytes, +int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext);

    @@ -282,6 +291,12 @@ document for an overview of all the PCRE2 documentation.


    PCRE2 NATIVE API AUXILIARY FUNCTIONS

    +pcre2_code *pcre2_code_copy(const pcre2_code *code); +
    +
    +pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +
    +
    int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, PCRE2_SIZE bufflen);
    @@ -406,9 +421,10 @@ More complicated programs might need to make use of the specialist functions pcre2_jit_stack_assign() in order to control the JIT code's memory usage.

    -JIT matching is automatically used by pcre2_match() if it is available. -There is also a direct interface for JIT matching, which gives improved -performance. The JIT-specific functions are discussed in the +JIT matching is automatically used by pcre2_match() if it is available, +unless the PCRE2_NO_JIT option is set. There is also a direct interface for JIT +matching, which gives improved performance. The JIT-specific functions are +discussed in the pcre2jit documentation.

    @@ -447,10 +463,19 @@ return a copy of the subject string with substitutions for parts that were matched.

    +Functions whose names begin with pcre2_serialize_ are used for saving +compiled patterns on disc or elsewhere, and reloading them later. +

    +

    Finally, there are functions for finding out information about a compiled pattern (pcre2_pattern_info()) and about the configuration with which PCRE2 was built (pcre2_config()).

    +

    +Functions with names ending with _free() are used for freeing memory +blocks of various sorts. In all cases, if one of these functions is called with +a NULL argument, it does nothing. +


    STRING LENGTHS AND OFFSETS

    The PCRE2 API uses string lengths and offsets into strings of code units in @@ -508,20 +533,52 @@ time ensuring that multithreaded applications can use it. There are several different blocks of data that are used to pass information between the application and the PCRE2 libraries.

    +
    +The compiled pattern +

    -(1) A pointer to the compiled form of a pattern is returned to the user when +A pointer to the compiled form of a pattern is returned to the user when pcre2_compile() is successful. The data in the compiled pattern is fixed, and does not change when the pattern is matched. Therefore, it is thread-safe, that is, the same compiled pattern can be used by more than one thread -simultaneously. An application can compile all its patterns at the start, -before forking off multiple threads that use them. However, if the just-in-time -optimization feature is being used, it needs separate memory stack areas for -each thread. See the +simultaneously. For example, an application can compile all its patterns at the +start, before forking off multiple threads that use them. However, if the +just-in-time optimization feature is being used, it needs separate memory stack +areas for each thread. See the pcre2jit documentation for more details.

    -(2) The next section below introduces the idea of "contexts" in which PCRE2 +In a more complicated situation, where patterns are compiled only when they are +first needed, but are still shared between threads, pointers to compiled +patterns must be protected from simultaneous writing by multiple threads, at +least until a pattern has been compiled. The logic can be something like this: +

    +  Get a read-only (shared) lock (mutex) for pointer
    +  if (pointer == NULL)
    +    {
    +    Get a write (unique) lock for pointer
    +    pointer = pcre2_compile(...
    +    }
    +  Release the lock
    +  Use pointer in pcre2_match()
    +
    +Of course, testing for compilation errors should also be included in the code. +

    +

    +If JIT is being used, but the JIT compilation is not being done immediately, +(perhaps waiting to see if the pattern is used often enough) similar logic is +required. JIT compilation updates a pointer within the compiled code block, so +a thread must gain unique write access to the pointer before calling +pcre2_jit_compile(). Alternatively, pcre2_code_copy() or +pcre2_code_copy_with_tables() can be used to obtain a private copy of the +compiled code. +

    +
    +Context blocks +
    +

    +The next main section below introduces the idea of "contexts" in which PCRE2 functions are called. A context is nothing more than a collection of parameters that control the way PCRE2 operates. Grouping a number of parameters together in a context is a convenient way of passing them to a PCRE2 function without @@ -535,11 +592,14 @@ are never changed, the same context can be used by all the threads. However, if any thread needs to change any value in a context, it must make its own thread-specific copy.

    +
    +Match blocks +

    -(3) The matching functions need a block of memory for working space and for -storing the results of a match. This includes details of what was matched, as -well as additional information such as the name of a (*MARK) setting. Each -thread must provide its own version of this memory. +The matching functions need a block of memory for working space and for storing +the results of a match. This includes details of what was matched, as well as +additional information such as the name of a (*MARK) setting. Each thread must +provide its own copy of this memory.


    PCRE2 CONTEXTS

    @@ -610,6 +670,7 @@ of the following compile-time parameters: PCRE2's character tables The newline character sequence The compile time nested parentheses limit + The maximum length of the pattern string An external function for stack checking A compile context is also required if you are using custom memory management. @@ -648,6 +709,15 @@ interpreted matching functions, pcre2_match() and The value must be the result of a call to pcre2_maketables(), whose only argument is a general context. This function builds a set of character tables in the current locale. +int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +
    +
    +This sets a maximum length, in code units, for the pattern string that is to be +compiled. If the pattern is longer, an error is generated. This facility is +provided so that applications that accept patterns from external sources can +limit their size. The default is the largest number that a PCRE2_SIZE variable +can hold, which is effectively unlimited. int pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t value);
    @@ -670,7 +740,8 @@ functions, pcre2_match() and pcre2_dfa_match().
    This parameter ajusts the limit, set when PCRE2 is built (default 250), on the depth of parenthesis nesting in a pattern. This limit stops rogue patterns -using up too much system stack when being compiled. +using up too much system stack when being compiled. The limit applies to +parentheses of all kinds, not just capturing parentheses. int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, int (*guard_function)(uint32_t, void *), void *user_data);
    @@ -697,8 +768,9 @@ A match context is required if you want to change the default values of any of the following match-time parameters:

       A callout function
    -  The limit for calling match()
    -  The limit for calling match() recursively
    +  The offset limit for matching an unanchored pattern
    +  The limit for calling match() (see below)
    +  The limit for calling match() recursively
     
    A match context is also required if you are using custom memory management. If none of these apply, just pass NULL as the context argument of @@ -729,6 +801,32 @@ This sets up a "callout" function, which PCRE2 will call at specified points during a matching operation. Details are given in the pcre2callout documentation. +int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +
    +
    +The offset_limit parameter limits how far an unanchored search can +advance in the subject string. The default value is PCRE2_UNSET. The +pcre2_match() and pcre2_dfa_match() functions return +PCRE2_ERROR_NOMATCH if a match with a starting point before or at the given +offset is not found. For example, if the pattern /abc/ is matched against +"123abc" with an offset limit less than 3, the result is PCRE2_ERROR_NO_MATCH. +A match can never be found if the startoffset argument of +pcre2_match() or pcre2_dfa_match() is greater than the offset +limit. +

    +

    +When using this facility, you must set PCRE2_USE_OFFSET_LIMIT when calling +pcre2_compile() so that when JIT is in use, different code can be +compiled. If a match is started with a non-default match limit when +PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. +

    +

    +The offset limit facility can be used to track progress when searching large +subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to +start within the first line of the subject. If this is set with an offset +limit, a match must occur in the first line and also within the offset limit. +In other words, whichever limit comes first is used. int pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t value);
    @@ -781,21 +879,23 @@ This limit is of use only if it is set smaller than match_limit. Limiting the recursion depth limits the amount of system stack that can be used, or, when PCRE2 has been compiled to use memory on the heap instead of the stack, the amount of heap memory that can be used. This limit is not relevant, -and is ignored, when matching is done using JIT compiled code or by the -pcre2_dfa_match() function. +and is ignored, when matching is done using JIT compiled code. However, it is +supported by pcre2_dfa_match(), which uses recursive function calls less +frequently than pcre2_match(), but which can be caused to use a lot of +stack by a recursive pattern such as /(.)(?1)/ matched to a very long string.

    The default value for recursion_limit can be set when PCRE2 is built; the default default is the same value as the default for match_limit. If the -limit is exceeded, pcre2_match() returns PCRE2_ERROR_RECURSIONLIMIT. A -value for the recursion limit may also be supplied by an item at the start of a -pattern of the form +limit is exceeded, pcre2_match() and pcre2_dfa_match() return +PCRE2_ERROR_RECURSIONLIMIT. A value for the recursion limit may also be +supplied by an item at the start of a pattern of the form

       (*LIMIT_RECURSION=ddd)
     
    where ddd is a decimal number. However, such a setting is ignored unless ddd is -less than the limit set by the caller of pcre2_match() or, if no such -limit is set, less than the default. +less than the limit set by the caller of pcre2_match() or +pcre2_dfa_match() or, if no such limit is set, less than the default. int pcre2_set_recursion_memory_management( pcre2_match_context *mcontext, void *(*private_malloc)(PCRE2_SIZE, void *), @@ -936,7 +1036,7 @@ The where argument should point to a buffer that is at least 24 code units long. (The exact length required can be found by calling pcre2_config() with where set to NULL.) If PCRE2 has been compiled without Unicode support, the buffer is filled with the text "Unicode not -supported". Otherwise, the Unicode version string (for example, "7.0.0") is +supported". Otherwise, the Unicode version string (for example, "8.0.0") is inserted. The number of code units used is returned. This is the length of the string plus one unit for the terminating zero.
    @@ -961,35 +1061,67 @@ zero.
       pcre2_compile_context *ccontext);
     

    -pcre2_code_free(pcre2_code *code); +void pcre2_code_free(pcre2_code *code); +
    +
    +pcre2_code *pcre2_code_copy(const pcre2_code *code); +
    +
    +pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code);

    The pcre2_compile() function compiles a pattern into an internal form. -The pattern is defined by a pointer to a string of code units and a length, If +The pattern is defined by a pointer to a string of code units and a length. If the pattern is zero-terminated, the length can be specified as PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that -contains the compiled pattern and related data. The caller must free the memory -by calling pcre2_code_free() when it is no longer needed. -

    -

    -NOTE: When one of the matching functions is called, pointers to the compiled -pattern and the subject string are set in the match data block so that they can -be referenced by the extraction functions. After running a match, you must not -free a compiled pattern (or a subject string) until after all operations on the -match data block -have taken place. +contains the compiled pattern and related data, or NULL if an error occurred.

    If the compile context argument ccontext is NULL, memory for the compiled pattern is obtained by calling malloc(). Otherwise, it is obtained from -the same memory function that was used for the compile context. +the same memory function that was used for the compile context. The caller must +free the memory by calling pcre2_code_free() when it is no longer needed.

    -The options argument contains various bit settings that affect the -compilation. It should be zero if no options are required. The available -options are described below. Some of them (in particular, those that are -compatible with Perl, but some others as well) can also be set and unset from -within the pattern (see the detailed description in the +The function pcre2_code_copy() makes a copy of the compiled code in new +memory, using the same memory allocator as was used for the original. However, +if the code has been processed by the JIT compiler (see +below), +the JIT information cannot be copied (because it is position-dependent). +The new copy can initially be used only for non-JIT matching, though it can be +passed to pcre2_jit_compile() if required. +

    +

    +The pcre2_code_copy() function provides a way for individual threads in a +multithreaded application to acquire a private copy of shared compiled code. +However, it does not make a copy of the character tables used by the compiled +pattern; the new pattern code points to the same tables as the original code. +(See +"Locale Support" +below for details of these character tables.) In many applications the same +tables are used throughout, so this behaviour is appropriate. Nevertheless, +there are occasions when a copy of a compiled pattern and the relevant tables +are needed. The pcre2_code_copy_with_tables() provides this facility. +Copies of both the code and the tables are made, with the new code pointing to +the new tables. The memory for the new tables is automatically freed when +pcre2_code_free() is called for the new copy of the compiled code. +

    +

    +NOTE: When one of the matching functions is called, pointers to the compiled +pattern and the subject string are set in the match data block so that they can +be referenced by the substring extraction functions. After running a match, you +must not free a compiled pattern (or a subject string) until after all +operations on the +match data block +have taken place. +

    +

    +The options argument for pcre2_compile() contains various bit +settings that affect the compilation. It should be zero if no options are +required. The available options are described below. Some of them (in +particular, those that are compatible with Perl, but some others as well) can +also be set and unset from within the pattern (see the detailed description in +the pcre2pattern documentation).

    @@ -1006,13 +1138,27 @@ newline setting) can be provided in a compile context (as described

    If errorcode or erroroffset is NULL, pcre2_compile() returns -NULL immediately. Otherwise, if compilation of a pattern fails, -pcre2_compile() returns NULL, having set these variables to an error code -and an offset (number of code units) within the pattern, respectively. The -pcre2_get_error_message() function provides a textual message for each -error code. Compilation errors are positive numbers, but UTF formatting errors -are negative numbers. For an invalid UTF-8 or UTF-16 string, the offset is that -of the first code unit of the failing character. +NULL immediately. Otherwise, the variables to which these point are set to an +error code and an offset (number of code units) within the pattern, +respectively, when pcre2_compile() returns NULL because a compilation +error has occurred. The values are not defined when compilation is successful +and pcre2_compile() returns a non-NULL value. +

    +

    +The value returned in erroroffset is an indication of where in the +pattern the error occurred. It is not necessarily the furthest point in the +pattern that was read. For example, after the error "lookbehind assertion is +not fixed length", the error offset points to the start of the failing +assertion. +

    +

    +The pcre2_get_error_message() function (see "Obtaining a textual error +message" +below) +provides a textual message for each error code. Compilation errors have +positive error codes; UTF formatting error codes are negative. For an invalid +UTF-8 or UTF-16 string, the offset is that of the first code unit of the +failing character.

    Some errors are not detected until the whole pattern has been scanned; in these @@ -1083,12 +1229,24 @@ after any internal newline. However, it does not match after a newline at the end of the subject, for compatibility with Perl. If you want a multiline circumflex also to match after a terminating newline, you must set PCRE2_ALT_CIRCUMFLEX. +

    +  PCRE2_ALT_VERBNAMES
    +
    +By default, for compatibility with Perl, the name in any verb sequence such as +(*MARK:NAME) is any sequence of characters that does not include a closing +parenthesis. The name is not processed in any way, and it is not possible to +include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES +option is set, normal backslash processing is applied to verb names and only an +unescaped closing parenthesis terminates the name. A closing parenthesis can be +included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED +option is set, unescaped whitespace in verb names is skipped and #-comments are +recognized, exactly as in the rest of the pattern.
       PCRE2_AUTO_CALLOUT
     
    If this bit is set, pcre2_compile() automatically inserts callout items, -all with number 255, before each pattern item. For discussion of the callout -facility, see the +all with number 255, before each pattern item, except immediately before or +after a callout in the pattern. For discussion of the callout facility, see the pcre2callout documentation.
    @@ -1156,7 +1314,10 @@ built.
     
    If this option is set, an unanchored pattern is required to match before or at the first newline in the subject string, though the matched text may continue -over the newline. +over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a more +general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit, a +match must occur in the first line and also within the offset limit. In other +words, whichever limit comes first is used.
       PCRE2_MATCH_UNSET_BACKREF
     
    @@ -1195,7 +1356,8 @@ This option locks out the use of \C in the pattern that is being compiled. This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because it may leave the current matching point in the middle of a multi-code-unit character. This option may be useful in applications that process patterns from -external sources. +external sources. Note that there is also a build-time option that permanently +locks out the use of \C.
       PCRE2_NEVER_UCP
     
    @@ -1221,7 +1383,9 @@ If this option is set, it disables the use of numbered capturing parentheses in the pattern. Any opening parenthesis that is not followed by ? behaves as if it were followed by ?: but named parentheses can still be used for capturing (and they acquire numbers in the usual way). There is no equivalent of this option -in Perl. +in Perl. Note that, if this option is set, references to capturing groups (back +references or recursion/subroutine calls) may only refer to named groups, +though the reference can be by name or by number.
       PCRE2_NO_AUTO_POSSESS
     
    @@ -1338,6 +1502,17 @@ support. This option inverts the "greediness" of the quantifiers so that they are not greedy by default, but become greedy if followed by "?". It is not compatible with Perl. It can also be set by a (?U) option setting within the pattern. +
    +  PCRE2_USE_OFFSET_LIMIT
    +
    +This option must be set for pcre2_compile() if +pcre2_set_offset_limit() is going to be used to set a non-default offset +limit in a match context for matches that use this pattern. An error is +generated if an offset limit is set without this option. For more details, see +the description of pcre2_set_offset_limit() in the +section +that describes match contexts. See also the PCRE2_FIRSTLINE +option above.
       PCRE2_UTF
     
    @@ -1352,14 +1527,17 @@ page.


    COMPILATION ERROR CODES

    -There are over 80 positive error codes that pcre2_compile() may return if -it finds an error in the pattern. There are also some negative error codes that -are used for invalid UTF strings. These are the same as given by -pcre2_match() and pcre2_dfa_match(), and are described in the +There are over 80 positive error codes that pcre2_compile() may return +(via errorcode) if it finds an error in the pattern. There are also some +negative error codes that are used for invalid UTF strings. These are the same +as given by pcre2_match() and pcre2_dfa_match(), and are described +in the pcre2unicode -page. The pcre2_get_error_message() function can be called to obtain a -textual error message from any error code. -

    +page. The pcre2_get_error_message() function (see "Obtaining a textual +error message" +below) +can be called to obtain a textual error message from any error code. +


    JUST-IN-TIME (JIT) COMPILATION

    int pcre2_jit_compile(pcre2_code *code, uint32_t options); @@ -1495,11 +1673,15 @@ are as follows: Return a copy of the pattern's options. The third argument should point to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOPTIONS returns -the compile options as modified by any top-level option settings at the start -of the pattern itself. In other words, they are the options that will be in -force when matching starts. For example, if the pattern /(?im)abc(?-i)d/ is -compiled with the PCRE2_EXTENDED option, the result is PCRE2_CASELESS, -PCRE2_MULTILINE, and PCRE2_EXTENDED. +the compile options as modified by any top-level (*XXX) option settings such as +(*UTF) at the start of the pattern itself. +

    +

    +For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED +option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. +Option settings such as (?i) that can change within a pattern do not affect the +result of PCRE2_INFO_ALLOPTIONS, even if they appear right at the start of the +pattern. (This was different in some earlier releases.)

    A pattern compiled without PCRE2_ANCHORED is automatically anchored by PCRE2 if @@ -1541,18 +1723,27 @@ matches only CR, LF, or CRLF.

       PCRE2_INFO_CAPTURECOUNT
     
    -Return the number of capturing subpatterns in the pattern. The third argument -should point to an uint32_t variable. +Return the highest capturing subpattern number in the pattern. In patterns +where (?| is not used, this is also the total number of capturing subpatterns. +The third argument should point to an uint32_t variable. +
    +  PCRE2_INFO_FIRSTBITMAP
    +
    +In the absence of a single first code unit for a non-anchored pattern, +pcre2_compile() may construct a 256-bit table that defines a fixed set of +values for the first code unit in any match. For example, a pattern that starts +with [abc] results in a table with three bits set. When code unit values +greater than 255 are supported, the flag bit for 255 means "any code unit of +value 255 or above". If such a table was constructed, a pointer to it is +returned. Otherwise NULL is returned. The third argument should point to an +const uint8_t * variable.
       PCRE2_INFO_FIRSTCODETYPE
     
    Return information about the first code unit of any matched string, for a non-anchored pattern. The third argument should point to an uint32_t -variable. -

    -

    -If there is a fixed first value, for example, the letter "c" from a pattern -such as (cat|cow|coyote), 1 is returned, and the character value can be +variable. If there is a fixed first value, for example, the letter "c" from a +pattern such as (cat|cow|coyote), 1 is returned, and the character value can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is known that a match can occur only at the start of the subject or following a newline in the subject, 2 is returned. Otherwise, and for anchored @@ -1567,16 +1758,10 @@ value is always less than 256. In the 16-bit library the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.

    -  PCRE2_INFO_FIRSTBITMAP
    +  PCRE2_INFO_HASBACKSLASHC
     
    -In the absence of a single first code unit for a non-anchored pattern, -pcre2_compile() may construct a 256-bit table that defines a fixed set of -values for the first code unit in any match. For example, a pattern that starts -with [abc] results in a table with three bits set. When code unit values -greater than 255 are supported, the flag bit for 255 means "any code unit of -value 255 or above". If such a table was constructed, a pointer to it is -returned. Otherwise NULL is returned. The third argument should point to an -const uint8_t * variable. +Return 1 if the pattern contains any instances of \C, otherwise 0. The third +argument should point to an uint32_t variable.
       PCRE2_INFO_HASCRORLF
     
    @@ -1602,13 +1787,10 @@ Returns 1 if there is a rightmost literal code unit that must exist in any matched string, other than at its start. The third argument should point to an uint32_t variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using -PCRE2_INFO_LASTCODEUNIT. -

    -

    -For anchored patterns, a last literal value is recorded only if it follows -something of variable length. For example, for the pattern /^a\d+z\d+/ the -returned value is 1 (with "z" returned from PCRE2_INFO_LASTCODEUNIT), but for -/^a\dz\d/ the returned value is 0. +PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is +recorded only if it follows something of variable length. For example, for the +pattern /^a\d+z\d+/ the returned value is 1 (with "z" returned from +PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is 0.

       PCRE2_INFO_LASTCODEUNIT
     
    @@ -1619,8 +1801,11 @@ value, 0 is returned.
       PCRE2_INFO_MATCHEMPTY
     
    -Return 1 if the pattern can match an empty string, otherwise 0. The third -argument should point to an uint32_t variable. +Return 1 if the pattern might match an empty string, otherwise 0. The third +argument should point to an uint32_t variable. When a pattern contains +recursive subroutine calls it is not always possible to determine whether or +not it can match an empty string. PCRE2 takes a cautious approach and returns 1 +in such cases.
       PCRE2_INFO_MATCHLIMIT
     
    @@ -1778,12 +1963,12 @@ documentation.


    THE MATCH DATA BLOCK

    -pcre2_match_data_create(uint32_t ovecsize, +pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, pcre2_general_context *gcontext);

    -pcre2_match_data_create_from_pattern(const pcre2_code *code, - pcre2_general_context *gcontext); +pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext);

    void pcre2_match_data_free(pcre2_match_data *match_data); @@ -1793,7 +1978,7 @@ Information about a successful or unsuccessful match is placed in a match data block, which is an opaque structure that is accessed by function calls. In particular, the match data block contains a vector of offsets into the subject string that define the matched part of the subject and any substrings that were -captured. This is know as the ovector. +captured. This is known as the ovector.

    Before calling pcre2_match(), pcre2_dfa_match(), or @@ -1951,14 +2136,15 @@ Option bits for pcre2_match()

    The unused bits of the options argument for pcre2_match() must be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL, -PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, -PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below. +PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, +PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is +described below.

    Setting PCRE2_ANCHORED at match time is not supported by the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the normal interpretive -code in pcre2_match() is run. The remaining options are supported for JIT -matching. +code in pcre2_match() is run. Apart from PCRE2_NO_JIT (obviously), the +remaining options are supported for JIT matching.

       PCRE2_ANCHORED
     
    @@ -2005,17 +2191,32 @@ only at the first matching position, that is, at the start of the subject plus the starting offset. An empty string match later in the subject is permitted. If the pattern is anchored, such a match can occur only if the pattern contains \K. +
    +  PCRE2_NO_JIT
    +
    +By default, if a pattern has been successfully processed by +pcre2_jit_compile(), JIT is automatically used when pcre2_match() +is called with options that JIT supports. Setting PCRE2_NO_JIT disables the use +of JIT; it forces matching to be done by the interpreter.
       PCRE2_NO_UTF_CHECK
     
    When PCRE2_UTF is set at compile time, the validity of the subject as a UTF string is checked by default when pcre2_match() is subsequently called. -The entire string is checked before any other processing takes place, and a +If a non-zero starting offset is given, the check is applied only to that part +of the subject that could be inspected during matching, and there is a check +that the starting offset points to the first code unit of a character or to the +end of the subject. If there are no lookbehind assertions in the pattern, the +check starts at the starting offset. Otherwise, it starts at the length of the +longest lookbehind before the starting offset, or at the start of the subject +if there are not that many characters before the starting offset. Note that the +sequences \b and \B are one-character lookbehinds. +

    +

    +The check is carried out before any other processing takes place, and a negative error code is returned if the check fails. There are several UTF error codes for each code unit width, corresponding to different problems with the -code unit sequence. The value of startoffset is also checked, to ensure -that it points to the start of a character or to the end of the subject. There -are discussions about the validity of +code unit sequence. There are discussions about the validity of UTF-8 strings, UTF-16 strings, and @@ -2066,8 +2267,13 @@ documentation. When PCRE2 is built, a default newline convention is set; this is usually the standard convention for the operating system. The default can be overridden in a -compile context. -During matching, the newline choice affects the behaviour of the dot, +compile context +by calling pcre2_set_newline(). It can also be overridden by starting a +pattern string with, for example, (*CRLF), as described in the +section on newline conventions +in the +pcre2pattern +page. During matching, the newline choice affects the behaviour of the dot, circumflex, and dollar metacharacters. It may also alter the way the match starting position is advanced after a match failure for an unanchored pattern.

    @@ -2115,19 +2321,20 @@ function can be used to find out how many capturing subpatterns there are in a compiled pattern.

    -A successful match returns the overall matched string and any captured -substrings to the caller via a vector of PCRE2_SIZE values. This is called the -ovector, and is contained within the -match data block. -You can obtain direct access to the ovector by calling -pcre2_get_ovector_pointer() to find its address, and -pcre2_get_ovector_count() to find the number of pairs of values it -contains. Alternatively, you can use the auxiliary functions for accessing -captured substrings +You can use auxiliary functions for accessing captured substrings by number or -by name -(see below). +by name, +as described in sections below. +

    +

    +Alternatively, you can make direct use of the vector of PCRE2_SIZE values, +called the ovector, which contains the offsets of captured strings. It is +part of the +match data block. +The function pcre2_get_ovector_pointer() returns the address of the +ovector, and pcre2_get_ovector_count() returns the number of pairs of +values it contains.

    Within the ovector, the first in each pair of values is set to the offset of @@ -2216,7 +2423,13 @@ After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure to match (PCRE2_ERROR_NOMATCH), a (*MARK) name may be available, and pcre2_get_mark() can be called. It returns a pointer to the zero-terminated name, which is within the compiled pattern. Otherwise NULL is -returned. After a successful match, the (*MARK) name that is returned is the +returned. The length of the (*MARK) name (excluding the terminating zero) is +stored in the code unit that preceeds the name. You should use this instead of +relying on the terminating zero if the (*MARK) name might contain a binary +zero. +

    +

    +After a successful match, the (*MARK) name that is returned is the last one encountered on the matching path through the pattern. After a "no match" or a partial match, the last encountered (*MARK) name is returned. For example, consider this pattern: @@ -2237,7 +2450,7 @@ escape sequence. After a partial match, however, this value is always the same as ovector[0] because \K does not affect the result of a partial match.

    -After a UTF check failure, \fBpcre2_get_startchar()\fB can be used to obtain +After a UTF check failure, pcre2_get_startchar() can be used to obtain the code unit offset of the invalid UTF character. Details are given in the pcre2unicode page. @@ -2245,11 +2458,13 @@ page.
    ERROR RETURNS FROM pcre2_match()

    If pcre2_match() fails, it returns a negative number. This can be -converted to a text string by calling pcre2_get_error_message(). Negative -error codes are also returned by other functions, and are documented with them. -The codes are given names in the header file. If UTF checking is in force and -an invalid UTF subject string is detected, one of a number of UTF-specific -negative error codes is returned. Details are given in the +converted to a text string by calling the pcre2_get_error_message() +function (see "Obtaining a textual error message" +below). +Negative error codes are also returned by other functions, and are documented +with them. The codes are given names in the header file. If UTF checking is in +force and an invalid UTF subject string is detected, one of a number of +UTF-specific negative error codes is returned. Details are given in the pcre2unicode page. The following are the other errors that may be returned by pcre2_match(): @@ -2350,8 +2565,29 @@ is attempted. PCRE2_ERROR_RECURSIONLIMIT

    The internal recursion limit was reached. +

    +
    OBTAINING A TEXTUAL ERROR MESSAGE
    +

    +int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); +

    +

    +A text message for an error code from any PCRE2 function (compile, match, or +auxiliary) can be obtained by calling pcre2_get_error_message(). The code +is passed as the first argument, with the remaining two arguments specifying a +code unit buffer and its length, into which the text message is placed. Note +that the message is returned in code units of the appropriate width for the +library that is being used. +

    +

    +The returned message is terminated with a trailing zero, and the function +returns the number of code units used, excluding the trailing zero. If the +error number is unknown, the negative error code PCRE2_ERROR_BADDATA is +returned. If the buffer is too small, the message is truncated (but still with +a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned. +None of the messages are very long; a buffer size of 120 code units is ample.

    -
    EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
    +
    EXTRACTING CAPTURED SUBSTRINGS BY NUMBER

    int pcre2_substring_length_bynumber(pcre2_match_data *match_data, uint32_t number, PCRE2_SIZE *length); @@ -2448,7 +2684,7 @@ The substring did not participate in the match. For example, if the pattern is (abc)|(def) and the subject is "def", and the ovector contains at least two capturing slots, substring number 1 is unset.

    -
    EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
    +
    EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS

    int pcre2_substring_list_get(pcre2_match_data *match_data, " PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); @@ -2487,7 +2723,7 @@ can be distinguished from a genuine zero-length substring by inspecting the appropriate offset in the ovector, which contain PCRE2_UNSET for unset substrings, or by calling pcre2_substring_length_bynumber().

    -
    EXTRACTING CAPTURED SUBSTRINGS BY NAME
    +
    EXTRACTING CAPTURED SUBSTRINGS BY NAME

    int pcre2_substring_number_from_name(const pcre2_code *code, PCRE2_SPTR name); @@ -2547,37 +2783,22 @@ names are not included in the compiled code. The matching process uses only numbers. For this reason, the use of different names for subpatterns of the same number causes an error at compile time.

    -
    CREATING A NEW STRING WITH SUBSTITUTIONS
    +
    CREATING A NEW STRING WITH SUBSTITUTIONS

    int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, - pcre2_match_context *mcontext, PCRE2_SPTR \fIreplacementzfP, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength, PCRE2_UCHAR *\fIoutputbuffer\zfP, PCRE2_SIZE *outlengthptr); +

    +

    This function calls pcre2_match() and then makes a copy of the subject string in outputbuffer, replacing the part that was matched with the replacement string, whose length is supplied in rlength. This can -be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. -

    -

    -In the replacement string, which is interpreted as a UTF string in UTF mode, -and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a -dollar character is an escape character that can specify the insertion of -characters from capturing groups in the pattern. The following forms are -recognized: -

    -  $$      insert a dollar character
    -  $<n>    insert the contents of group <n>
    -  ${<n>}  insert the contents of group <n>
    -
    -Either a group number or a group name can be given for <n>. Curly brackets are -required only if the following character would be interpreted as part of the -number or name. The number may be zero to include the entire matched string. -For example, if the pattern a(b)c is matched with "=abc=" and the replacement -string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by -calling pcre2_copy_byname() or pcre2_copy_bynumber() as -appropriate. +be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in +which a \K item in a lookahead in the pattern causes the match to end before +it starts are not supported, and give rise to an error return.

    The first seven arguments of pcre2_substitute() are the same as for @@ -2588,27 +2809,188 @@ functions from the match context, if provided, or else those that were used to allocate memory for the compiled code.

    -There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the -function to iterate over the subject string, replacing every matching -substring. If this is not set, only the first matching substring is replaced. -

    -

    The outlengthptr argument must point to a variable that contains the -length, in code units, of the output buffer. It is updated to contain the -length of the new string, excluding the trailing zero that is automatically -added. +length, in code units, of the output buffer. If the function is successful, the +value is updated to contain the length of the new string, excluding the +trailing zero that is automatically added.

    -The function returns the number of replacements that were made. This may be -zero if no matches were found, and is never greater than 1 unless -PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code -is returned. Except for PCRE2_ERROR_NOMATCH (which is never returned), any -errors from pcre2_match() or the substring copying functions are passed -straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid -replacement string (unrecognized sequence following a dollar sign), and -PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. +If the function is not successful, the value set via outlengthptr depends +on the type of error. For syntax errors in the replacement string, the value is +the offset in the replacement string where the error was detected. For other +errors, the value is PCRE2_UNSET by default. This includes the case of the +output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set +(see below), in which case the value is the minimum length needed, including +space for the trailing zero. Note that in order to compute the required length, +pcre2_substitute() has to simulate all the matching and copying, instead +of giving an error return as soon as the buffer overflows. Note also that the +length is in code units, not bytes.

    -
    DUPLICATE SUBPATTERN NAMES
    +

    +In the replacement string, which is interpreted as a UTF string in UTF mode, +and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a +dollar character is an escape character that can specify the insertion of +characters from capturing groups or (*MARK) items in the pattern. The following +forms are always recognized: +

    +  $$                  insert a dollar character
    +  $<n> or ${<n>}      insert the contents of group <n>
    +  $*MARK or ${*MARK}  insert the name of the last (*MARK) encountered
    +
    +Either a group number or a group name can be given for <n>. Curly brackets are +required only if the following character would be interpreted as part of the +number or name. The number may be zero to include the entire matched string. +For example, if the pattern a(b)c is matched with "=abc=" and the replacement +string "+$1$0$1+", the result is "=+babcb+=". +

    +

    +The facility for inserting a (*MARK) name can be used to perform simple +simultaneous substitutions, as this pcre2test example shows: +

    +  /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
    +      apple lemon
    +   2: pear orange
    +
    +As well as the usual options for pcre2_match(), a number of additional +options can be set in the options argument. +

    +

    +PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string, +replacing every matching substring. If this is not set, only the first matching +substring is replaced. If any matched substring has zero length, after the +substitution has happened, an attempt to find a non-empty match at the same +position is performed. If this is not successful, the current position is +advanced by one character except when CRLF is a valid newline sequence and the +next two characters are CR, LF. In this case, the current position is advanced +by two characters. +

    +

    +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is +too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If +this option is set, however, pcre2_substitute() continues to go through +the motions of matching and substituting (without, of course, writing anything) +in order to compute the size of buffer that is needed. This value is passed +back via the outlengthptr variable, with the result of the function still +being PCRE2_ERROR_NOMEMORY. +

    +

    +Passing a buffer size of zero is a permitted way of finding out how much memory +is needed for given substitution. However, this does mean that the entire +operation is carried out twice. Depending on the application, it may be more +efficient to allocate a large buffer and free the excess afterwards, instead of +using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. +

    +

    +PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups that do +not appear in the pattern to be treated as unset groups. This option should be +used with care, because it means that a typo in a group name or number no +longer causes the PCRE2_ERROR_NOSUBSTRING error. +

    +

    +PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including unknown +groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty +strings when inserted as described above. If this option is not set, an attempt +to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does +not influence the extended substitution syntax described below. +

    +

    +PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the +replacement string. Without this option, only the dollar character is special, +and only the group insertion forms listed above are valid. When +PCRE2_SUBSTITUTE_EXTENDED is set, two things change: +

    +

    +Firstly, backslash in a replacement string is interpreted as an escape +character. The usual forms such as \n or \x{ddd} can be used to specify +particular character codes, and backslash followed by any non-alphanumeric +character quotes that character. Extended quoting can be coded using \Q...\E, +exactly as in pattern strings. +

    +

    +There are also four escape sequences for forcing the case of inserted letters. +The insertion mechanism has three states: no case forcing, force upper case, +and force lower case. The escape sequences change the current state: \U and +\L change to upper or lower case forcing, respectively, and \E (when not +terminating a \Q quoted sequence) reverts to no case forcing. The sequences +\u and \l force the next character (if it is a letter) to upper or lower +case, respectively, and then the state automatically reverts to no case +forcing. Case forcing applies to all inserted characters, including those from +captured groups and letters within \Q...\E quoted sequences. +

    +

    +Note that case forcing sequences such as \U...\E do not nest. For example, +the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no +effect. +

    +

    +The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +flexibility to group substitution. The syntax is similar to that used by Bash: +

    +  ${<n>:-<string>}
    +  ${<n>:+<string1>:<string2>}
    +
    +As before, <n> may be a group number or a name. The first form specifies a +default value. If group <n> is set, its value is inserted; if not, <string> is +expanded and the result inserted. The second form specifies strings that are +expanded and inserted when group <n> is set or unset, respectively. The first +form is just a convenient shorthand for +
    +  ${<n>:+${<n>}:<string>}
    +
    +Backslash can be used to escape colons and closing curly brackets in the +replacement strings. A change of the case forcing state within a replacement +string remains in force afterwards, as shown in this pcre2test example: +
    +  /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
    +      body
    +   1: hello
    +      somebody
    +   1: HELLO
    +
    +The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended +substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown +groups in the extended syntax forms to be treated as unset. +

    +

    +If successful, pcre2_substitute() returns the number of replacements that +were made. This may be zero if no matches were found, and is never greater than +1 unless PCRE2_SUBSTITUTE_GLOBAL is set. +

    +

    +In the event of an error, a negative error code is returned. Except for +PCRE2_ERROR_NOMATCH (which is never returned), errors from pcre2_match() +are passed straight back. +

    +

    +PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion, +unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. +

    +

    +PCRE2_ERROR_UNSET is returned for an unset substring insertion (including an +unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) when the simple +(non-extended) syntax is used and PCRE2_SUBSTITUTE_UNSET_EMPTY is not set. +

    +

    +PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is +needed is returned via outlengthptr. Note that this does not happen by +default. +

    +

    +PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the +replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE +(invalid escape sequence), PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket +not found), PCRE2_BADSUBSTITUTION (syntax error in extended group +substitution), and PCRE2_BADSUBPATTERN (the pattern match ended before it +started, which can happen if \K is used in an assertion). +

    +

    +As for all PCRE2 errors, a text message that describes the error can be +obtained by calling the pcre2_get_error_message() function (see +"Obtaining a textual error message" +above). +

    +
    DUPLICATE SUBPATTERN NAMES

    int pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); @@ -2647,13 +3029,13 @@ function returns the length of each entry in code units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name.

    -The format of the name table is described above in the section entitled -Information about a pattern -above. -Given all the relevant entries for the name, you can extract each of their -numbers, and hence the captured data. +The format of the name table is described +above +in the section entitled Information about a pattern. Given all the +relevant entries for the name, you can extract each of their numbers, and hence +the captured data.

    -
    FINDING ALL POSSIBLE MATCHES AT ONE POSITION
    +
    FINDING ALL POSSIBLE MATCHES AT ONE POSITION

    The traditional matching function uses a similar algorithm to Perl, which stops when it finds the first match at a given point in the subject. If you want to @@ -2671,7 +3053,7 @@ substring. Then return 1, which forces pcre2_match() to backtrack and try other alternatives. Ultimately, when it runs out of matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.

    -
    MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
    +
    MATCHING A PATTERN: THE ALTERNATIVE FUNCTION

    int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, @@ -2838,8 +3220,8 @@ There are in addition the following errors that are specific to PCRE2_ERROR_DFA_UITEM This return is given if pcre2_dfa_match() encounters an item in the -pattern that it does not support, for instance, the use of \C or a back -reference. +pattern that it does not support, for instance, the use of \C in a UTF mode or +a back reference.

       PCRE2_ERROR_DFA_UCOND
     
    @@ -2866,13 +3248,13 @@ some plausibility checks are made on the contents of the workspace, which should contain data about the previous partial match. If any of these checks fail, this error is given.

    -
    SEE ALSO
    +
    SEE ALSO

    pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3), pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2stack(3), pcre2unicode(3).

    -
    AUTHOR
    +
    AUTHOR

    Philip Hazel
    @@ -2881,11 +3263,11 @@ University Computing Service Cambridge, England.

    -
    REVISION
    +
    REVISION

    -Last updated: 22 April 2015 +Last updated: 23 December 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2build.html b/pcre2/doc/html/pcre2build.html index 8d9f9ce9e..2e75505f6 100644 --- a/pcre2/doc/html/pcre2build.html +++ b/pcre2/doc/html/pcre2build.html @@ -18,23 +18,26 @@ please consult the man page, in case the conversion went wrong.

  • BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES
  • BUILDING SHARED AND STATIC LIBRARIES
  • UNICODE AND UTF SUPPORT -
  • JUST-IN-TIME COMPILER SUPPORT -
  • NEWLINE RECOGNITION -
  • WHAT \R MATCHES -
  • HANDLING VERY LARGE PATTERNS -
  • AVOIDING EXCESSIVE STACK USAGE -
  • LIMITING PCRE2 RESOURCE USAGE -
  • CREATING CHARACTER TABLES AT BUILD TIME -
  • USING EBCDIC CODE -
  • PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT -
  • PCRE2GREP BUFFER SIZE -
  • PCRE2TEST OPTION FOR LIBREADLINE SUPPORT -
  • INCLUDING DEBUGGING CODE -
  • DEBUGGING WITH VALGRIND SUPPORT -
  • CODE COVERAGE REPORTING -
  • SEE ALSO -
  • AUTHOR -
  • REVISION +
  • DISABLING THE USE OF \C +
  • JUST-IN-TIME COMPILER SUPPORT +
  • NEWLINE RECOGNITION +
  • WHAT \R MATCHES +
  • HANDLING VERY LARGE PATTERNS +
  • AVOIDING EXCESSIVE STACK USAGE +
  • LIMITING PCRE2 RESOURCE USAGE +
  • CREATING CHARACTER TABLES AT BUILD TIME +
  • USING EBCDIC CODE +
  • PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS +
  • PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT +
  • PCRE2GREP BUFFER SIZE +
  • PCRE2TEST OPTION FOR LIBREADLINE SUPPORT +
  • INCLUDING DEBUGGING CODE +
  • DEBUGGING WITH VALGRIND SUPPORT +
  • CODE COVERAGE REPORTING +
  • SUPPORT FOR FUZZERS +
  • SEE ALSO +
  • AUTHOR +
  • REVISION
    BUILDING PCRE2

    @@ -148,13 +151,19 @@ properties. The application can request that they do by setting the PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also request this by starting with (*UCP).

    +
    DISABLING THE USE OF \C

    The \C escape sequence, which matches a single code unit, even in a UTF mode, can cause unpredictable behaviour because it may leave the current matching -point in the middle of a multi-code-unit character. It can be locked out by -setting the PCRE2_NEVER_BACKSLASH_C option. +point in the middle of a multi-code-unit character. The application can lock it +out by setting the PCRE2_NEVER_BACKSLASH_C option when calling +pcre2_compile(). There is also a build-time option +

    +  --enable-never-backslash-C
    +
    +(note the upper case C) which locks out the use of \C entirely.

    -
    JUST-IN-TIME COMPILER SUPPORT
    +
    JUST-IN-TIME COMPILER SUPPORT

    Just-in-time compiler support is included in the build by specifying

    @@ -171,7 +180,7 @@ pcre2grep automatically makes use of it, unless you add
     
    to the "configure" command.

    -
    NEWLINE RECOGNITION
    +
    NEWLINE RECOGNITION

    By default, PCRE2 interprets the linefeed (LF) character as indicating the end of a line. This is the normal newline character on Unix-like systems. You can @@ -208,7 +217,7 @@ Whatever default line ending convention is selected when PCRE2 is built can be overridden by applications that use the library. At build time it is conventional to use the standard for your operating system.

    -
    WHAT \R MATCHES
    +
    WHAT \R MATCHES

    By default, the sequence \R in a pattern matches any Unicode newline sequence, independently of what has been selected as the line ending sequence. If you @@ -220,7 +229,7 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is selected when PCRE2 is built can be overridden by applications that use the called.

    -
    HANDLING VERY LARGE PATTERNS
    +
    HANDLING VERY LARGE PATTERNS

    Within a compiled pattern, offset values are used to point from one part to another (for example, from an opening parenthesis to an alternation @@ -239,7 +248,7 @@ longer offsets slows down the operation of PCRE2 because it has to load additional data when handling them. For the 32-bit library the value is always 4 and cannot be overridden; the value of --with-link-size is ignored.

    -
    AVOIDING EXCESSIVE STACK USAGE
    +
    AVOIDING EXCESSIVE STACK USAGE

    When matching with the pcre2_match() function, PCRE2 implements backtracking by making recursive calls to an internal function called @@ -261,7 +270,7 @@ custom memory management functions can be called instead. PCRE2 runs noticeably more slowly when built in this way. This option affects only the pcre2_match() function; it is not relevant for pcre2_dfa_match().

    -
    LIMITING PCRE2 RESOURCE USAGE
    +
    LIMITING PCRE2 RESOURCE USAGE

    Internally, PCRE2 has a function called match(), which it calls repeatedly (sometimes recursively) when matching a pattern with the @@ -290,7 +299,7 @@ constraints. However, you can set a lower limit by adding, for example, to the configure command. This value can also be overridden at run time.

    -
    CREATING CHARACTER TABLES AT BUILD TIME
    +
    CREATING CHARACTER TABLES AT BUILD TIME

    PCRE2 uses fixed tables for processing characters whose code points are less than 256. By default, PCRE2 is built with a set of tables that are distributed @@ -307,7 +316,7 @@ compiling, because dftables is run on the local host. If you need to create alternative tables when cross compiling, you will have to do so "by hand".)

    -
    USING EBCDIC CODE
    +
    USING EBCDIC CODE

    PCRE2 assumes by default that it will run in an environment where the character code is ASCII or Unicode, which is a superset of ASCII. This is the case for @@ -342,7 +351,16 @@ The options that select newline behaviour, such as --enable-newline-is-cr, and equivalent run-time options, refer to these character values in an EBCDIC environment.

    -
    PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
    +
    PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
    +

    +By default, on non-Windows systems, pcre2grep supports the use of +callouts with string arguments within the patterns it is matching, in order to +run external scripts. For details, see the +pcre2grep +documentation. This support can be disabled by adding +--disable-pcre2grep-callout to the configure command. +

    +
    PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT

    By default, pcre2grep reads all files as plain text. You can build it so that it recognizes files whose names end in .gz or .bz2, and reads @@ -355,22 +373,25 @@ to the configure command. These options naturally require that the relevant libraries are installed on your system. Configuration will fail if they are not.

    -
    PCRE2GREP BUFFER SIZE
    +
    PCRE2GREP BUFFER SIZE

    pcre2grep uses an internal buffer to hold a "window" on the file it is scanning, in order to be able to output "before" and "after" lines when it -finds a match. The size of the buffer is controlled by a parameter whose -default value is 20K. The buffer itself is three times this size, but because -of the way it is used for holding "before" lines, the longest line that is -guaranteed to be processable is the parameter size. You can change the default -parameter value by adding, for example, +finds a match. The starting size of the buffer is controlled by a parameter +whose default value is 20K. The buffer itself is three times this size, but +because of the way it is used for holding "before" lines, the longest line that +is guaranteed to be processable is the parameter size. If a longer line is +encountered, pcre2grep automatically expands the buffer, up to a +specified maximum size, whose default is 1M or the starting size, whichever is +the larger. You can change the default parameter values by adding, for example,

    -  --with-pcre2grep-bufsize=50K
    +  --with-pcre2grep-bufsize=51200
    +  --with-pcre2grep-max-bufsize=2097152
     
    -to the configure command. The caller of \fPpcre2grep\fP can override this -value by using --buffer-size on the command line.. +to the configure command. The caller of \fPpcre2grep\fP can override +these values by using --buffer-size and --max-buffer-size on the command line.

    -
    PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
    +
    PCRE2TEST OPTION FOR LIBREADLINE SUPPORT

    If you add one of

    @@ -404,7 +425,7 @@ automatically included, you may need to add something like
     
    immediately before the configure command.

    -
    INCLUDING DEBUGGING CODE
    +
    INCLUDING DEBUGGING CODE

    If you add

    @@ -413,7 +434,7 @@ If you add
     to the configure command, additional debugging code is included in the
     build. This feature is intended for use by the PCRE2 maintainers.
     

    -
    DEBUGGING WITH VALGRIND SUPPORT
    +
    DEBUGGING WITH VALGRIND SUPPORT

    If you add

    @@ -423,7 +444,7 @@ to the configure command, PCRE2 will use valgrind annotations to mark
     certain memory regions as unaddressable. This allows it to detect invalid
     memory accesses, and is mostly useful for debugging PCRE2 itself.
     

    -
    CODE COVERAGE REPORTING
    +
    CODE COVERAGE REPORTING

    If your C compiler is gcc, you can build a version of PCRE2 that can generate a code coverage report for its test suite. To enable this, you must install @@ -480,11 +501,32 @@ This cleans all coverage data including the generated coverage report. For more information about code coverage, see the gcov and lcov documentation.

    -
    SEE ALSO
    +
    SUPPORT FOR FUZZERS
    +

    +There is a special option for use by people who want to run fuzzing tests on +PCRE2: +

    +  --enable-fuzz-support
    +
    +At present this applies only to the 8-bit library. If set, it causes an extra +library called libpcre2-fuzzsupport.a to be built, but not installed. This +contains a single function called LLVMFuzzerTestOneInput() whose arguments are +a pointer to a string and the length of the string. When called, this function +tries to compile the string as a pattern, and if that succeeds, to match it. +This is done both with no options and with some random options bits that are +generated from the string. Setting --enable-fuzz-support also causes a binary +called pcre2fuzzcheck to be created. This is normally run under valgrind +or used when PCRE2 is compiled with address sanitizing enabled. It calls the +fuzzing function and outputs information about it is doing. The input strings +are specified by arguments: if an argument starts with "=" the rest of it is a +literal input string. Otherwise, it is assumed to be a file name, and the +contents of the file are the test string. +

    +
    SEE ALSO

    pcre2api(3), pcre2-config(3).

    -
    AUTHOR
    +
    AUTHOR

    Philip Hazel
    @@ -493,11 +535,11 @@ University Computing Service Cambridge, England.

    -
    REVISION
    +
    REVISION

    -Last updated: 24 April 2015 +Last updated: 01 November 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2callout.html b/pcre2/doc/html/pcre2callout.html index 7e85c9a39..4e307f778 100644 --- a/pcre2/doc/html/pcre2callout.html +++ b/pcre2/doc/html/pcre2callout.html @@ -57,11 +57,20 @@ two callout points:

    If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2 automatically inserts callouts, all with number 255, before each item in the -pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern +pattern except for immediately before or after a callout item in the pattern. +For example, if PCRE2_AUTO_CALLOUT is used with the pattern +
    +  A(?C3)B
    +
    +it is processed as if it were +
    +  (?C255)A(?C3)B(?C255)
    +
    +Here is a more complicated example:
       A(\d{2}|--)
     
    -it is processed as if it were +With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were

    (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) @@ -107,10 +116,10 @@ with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string No match
    This indicates that when matching [bc] fails, there is no backtracking into a+ -and therefore the callouts that would be taken for the backtracks do not occur. -You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to -pcre2_compile(), or starting the pattern with (*NO_AUTO_POSSESS). In this -case, the output changes to this: +(because it is being treated as a++) and therefore the callouts that would be +taken for the backtracks do not occur. You can disable the auto-possessify +feature by passing PCRE2_NO_AUTO_POSSESS to pcre2_compile(), or starting +the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this:
       --->aaaa
        +0 ^        a+
    @@ -235,8 +244,8 @@ Fields for numerical callouts
     

    For a numerical callout, callout_string is NULL, and callout_number contains the number of the callout, in the range 0-255. This is the number -that follows (?C for manual callouts; it is 255 for automatically generated -callouts. +that follows (?C for callouts that part of the pattern; it is 255 for +automatically generated callouts.


    Fields for string callouts @@ -310,10 +319,15 @@ the next item to be matched.

    The next_item_length field contains the length of the next item to be -matched in the pattern string. When the callout immediately precedes an -alternation bar, a closing parenthesis, or the end of the pattern, the length -is zero. When the callout precedes an opening parenthesis, the length is that -of the entire subpattern. +processed in the pattern string. When the callout is at the end of the pattern, +the length is zero. When the callout precedes an opening parenthesis, the +length includes meta characters that follow the parenthesis. For example, in a +callout before an assertion such as (?=ab) the length is 3. For an an +alternation bar or a closing parenthesis, the length is one, unless a closing +parenthesis is followed by a quantifier, in which case its length is included. +(This changed in release 10.23. In earlier releases, before an opening +parenthesis the length was that of the entire subpattern, and before an +alternation bar or a closing parenthesis the length was zero.)

    The pattern_position and next_item_length fields are intended to @@ -399,9 +413,9 @@ Cambridge, England.


    REVISION

    -Last updated: 23 March 2015 +Last updated: 29 September 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2compat.html b/pcre2/doc/html/pcre2compat.html index 3b29e6fa2..993dfd1d0 100644 --- a/pcre2/doc/html/pcre2compat.html +++ b/pcre2/doc/html/pcre2compat.html @@ -107,7 +107,7 @@ processed as anchored at the point where they are tested. one that is backtracked onto acts. For example, in the pattern A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the -same as PCRE2, but there are examples where it differs. +same as PCRE2, but there are cases where it differs.

    11. Most backtracking verbs in assertions have their normal actions. They are @@ -123,7 +123,7 @@ the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to 13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern names is not as general as Perl's. This is a consequence of the fact the PCRE2 works internally just with numbers, using an external table to translate -between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B), +between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b>B), where the two capturing parentheses have the same number but different names, is not supported, and causes an error at compile time. If it were allowed, it would not be possible to distinguish which parentheses matched, because both @@ -131,10 +131,11 @@ names map to capturing subpattern number 1. To avoid this confusing situation, an error is given at compile time.

    -14. Perl recognizes comments in some places that PCRE2 does not, for example, -between the ( and ? at the start of a subpattern. If the /x modifier is set, -Perl allows white space between ( and ? (though current Perls warn that this is -deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set. +14. Perl used to recognize comments in some places that PCRE2 does not, for +example, between the ( and ? at the start of a subpattern. If the /x modifier +is set, Perl allowed white space between ( and ? though the latest Perls give +an error (for a while it was just deprecated). There may still be some cases +where Perl behaves differently.

    15. Perl, when in warning mode, gives warnings for character classes such as @@ -161,42 +162,47 @@ each alternative branch of a lookbehind assertion can match a different length of string. Perl requires them all to have the same length.

    -(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ +(b) From PCRE2 10.23, back references to groups of fixed length are supported +in lookbehinds, provided that there is no possibility of referencing a +non-unique number or name. Perl does not support backreferences in lookbehinds. +
    +
    +(c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ meta-character matches only at the very end of the string.

    -(c) A backslash followed by a letter with no special meaning is faulted. (Perl +(d) A backslash followed by a letter with no special meaning is faulted. (Perl can be made to issue a warning.)

    -(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is +(e) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is inverted, that is, by default they are not greedy, but if followed by a question mark they are.

    -(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried +(f) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried only at the first matching position in the subject string.

    -(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and +(g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.

    -(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF +(h) The \R escape sequence can be restricted to match only CR, LF, or CRLF by the PCRE2_BSR_ANYCRLF option.

    -(h) The callout facility is PCRE2-specific. +(i) The callout facility is PCRE2-specific.

    -(i) The partial matching facility is PCRE2-specific. +(j) The partial matching facility is PCRE2-specific.

    -(j) The alternative matching function (pcre2_dfa_match() matches in a +(k) The alternative matching function (pcre2_dfa_match() matches in a different way and is not Perl-compatible.

    -(k) PCRE2 recognizes some special sequences such as (*CR) at the start of +(l) PCRE2 recognizes some special sequences such as (*CR) at the start of a pattern that set overall options that cannot be changed within the pattern.


    @@ -214,9 +220,9 @@ Cambridge, England. REVISION

    -Last updated: 15 March 2015 +Last updated: 18 October 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2demo.html b/pcre2/doc/html/pcre2demo.html index 5919117a1..d64e16be1 100644 --- a/pcre2/doc/html/pcre2demo.html +++ b/pcre2/doc/html/pcre2demo.html @@ -20,28 +20,31 @@ please consult the man page, in case the conversion went wrong. *************************************************/ /* This is a demonstration program to illustrate a straightforward way of -calling the PCRE2 regular expression library from a C program. See the +using the PCRE2 regular expression library from a C program. See the pcre2sample documentation for a short discussion ("man pcre2sample" if you have the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is incompatible with the original PCRE API. There are actually three libraries, each supporting a different code unit -width. This demonstration program uses the 8-bit library. +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. In Unix-like environments, if PCRE2 is installed in your standard system libraries, you should be able to compile this program using this command: -gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo If PCRE2 is not installed in a standard place, it is likely to be installed with support for the pkg-config mechanism. If you have pkg-config, you can compile this program using this command: -gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo -If you do not have pkg-config, you may have to use this: +If you do not have pkg-config, you may have to use something like this: -gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \ +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \ -R/usr/local/lib -lpcre2-8 -o pcre2demo Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and @@ -56,9 +59,14 @@ the following line. */ /* #define PCRE2_STATIC */ -/* This macro must be defined before including pcre2.h. For a program that uses -only one code unit width, it makes it possible to use generic function names -such as pcre2_compile(). */ +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ #define PCRE2_CODE_UNIT_WIDTH 8 @@ -79,19 +87,19 @@ int main(int argc, char **argv) { pcre2_code *re; PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ -PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ PCRE2_SPTR name_table; int crlf_is_newline; int errornumber; int find_all; int i; -int namecount; -int name_entry_size; int rc; int utf8; uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; uint32_t newline; PCRE2_SIZE erroroffset; @@ -106,15 +114,19 @@ pcre2_match_data *match_data; * First, sort out the command line. There is only one possible option at * * the moment, "-g" to request repeated matching to find all occurrences, * * like Perl's /g option. We set the variable find_all to a non-zero value * -* if the -g option is present. Apart from that, there must be exactly two * -* arguments. * +* if the -g option is present. * **************************************************************************/ find_all = 0; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-g") == 0) find_all = 1; - else break; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\n", argv[i]); + return 1; + } + else break; } /* After the options, we require exactly two arguments, which are the pattern, @@ -122,7 +134,7 @@ and the subject string. */ if (argc - i != 2) { - printf("Two arguments required: a regex and a subject string\n"); + printf("Exactly two arguments required: a regex and a subject string\n"); return 1; } @@ -201,7 +213,7 @@ if (rc < 0) stored. */ ovector = pcre2_get_ovector_pointer(match_data); -printf("\nMatch succeeded at offset %d\n", (int)ovector[0]); +printf("Match succeeded at offset %d\n", (int)ovector[0]); /************************************************************************* @@ -242,7 +254,7 @@ we have to extract the count of named parentheses from the pattern. */ PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ &namecount); /* where to put the answer */ -if (namecount <= 0) printf("No named substrings\n"); else +if (namecount == 0) printf("No named substrings\n"); else { PCRE2_SPTR tabptr; printf("Named substrings\n"); @@ -330,8 +342,8 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY || for (;;) { - uint32_t options = 0; /* Normally no options */ - PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ /* If the previous match was for an empty string, we are finished if we are at the end of the subject. Otherwise, arrange to run another match at the @@ -371,7 +383,7 @@ for (;;) { if (options == 0) break; /* All matches found */ ovector[1] = start_offset + 1; /* Advance one code unit */ - if (crlf_is_newline && /* If CRLF is newline & */ + if (crlf_is_newline && /* If CRLF is a newline & */ start_offset < subject_length - 1 && /* we are at CRLF, */ subject[start_offset] == '\r' && subject[start_offset + 1] == '\n') @@ -417,7 +429,7 @@ for (;;) printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start); } - if (namecount <= 0) printf("No named substrings\n"); else + if (namecount == 0) printf("No named substrings\n"); else { PCRE2_SPTR tabptr = name_table; printf("Named substrings\n"); diff --git a/pcre2/doc/html/pcre2grep.html b/pcre2/doc/html/pcre2grep.html index dcfb96f34..c5d1a33d7 100644 --- a/pcre2/doc/html/pcre2grep.html +++ b/pcre2/doc/html/pcre2grep.html @@ -22,11 +22,12 @@ please consult the man page, in case the conversion went wrong.

  • NEWLINES
  • OPTIONS COMPATIBILITY
  • OPTIONS WITH DATA -
  • MATCHING ERRORS -
  • DIAGNOSTICS -
  • SEE ALSO -
  • AUTHOR -
  • REVISION +
  • CALLING EXTERNAL SCRIPTS +
  • MATCHING ERRORS +
  • DIAGNOSTICS +
  • SEE ALSO +
  • AUTHOR +
  • REVISION
    SYNOPSIS

    @@ -79,11 +80,19 @@ span line boundaries. What defines a line boundary is controlled by the

    The amount of memory used for buffering files that are being scanned is -controlled by a parameter that can be set by the --buffer-size option. -The default value for this parameter is specified when pcre2grep is -built, with the default default being 20K. A block of memory three times this -size is used (to allow for buffering "before" and "after" lines). An error -occurs if a line overflows the buffer. +controlled by parameters that can be set by the --buffer-size and +--max-buffer-size options. The first of these sets the size of buffer +that is obtained at the start of processing. If an input file contains very +long lines, a larger buffer may be needed; this is handled by automatically +extending the buffer, up to the limit specified by --max-buffer-size. The +default values for these parameters are specified when pcre2grep is +built, with the default defaults being 20K and 1M respectively. An error occurs +if a line is too long and the buffer can no longer be expanded. +

    +

    +The block of memory that is actually used is three times the "buffer size", to +allow for buffering "before" and "after" lines. If the buffer size is too +small, fewer than requested "before" and "after" lines may be output.

    Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater. @@ -154,12 +163,13 @@ processing of patterns and file names that start with hyphens.

    -A number, --after-context=number -Output number lines of context after each matching line. If file names -and/or line numbers are being output, a hyphen separator is used instead of a -colon for the context lines. A line containing "--" is output between each -group of lines, unless they are in fact contiguous in the input file. The value -of number is expected to be relatively small. However, pcre2grep -guarantees to have up to 8K of following text available for context output. +Output up to number lines of context after each matching line. Fewer +lines are output if the next match or the end of the file is reached, or if the +processing buffer size has been set too small. If file names and/or line +numbers are being output, a hyphen separator is used instead of a colon for the +context lines. A line containing "--" is output between each group of lines, +unless they are in fact contiguous in the input file. The value of number +is expected to be relatively small. When -c is used, -A is ignored.

    -a, --text @@ -168,12 +178,14 @@ Treat binary files as text. This is equivalent to

    -B number, --before-context=number -Output number lines of context before each matching line. If file names -and/or line numbers are being output, a hyphen separator is used instead of a -colon for the context lines. A line containing "--" is output between each -group of lines, unless they are in fact contiguous in the input file. The value -of number is expected to be relatively small. However, pcre2grep -guarantees to have up to 8K of preceding text available for context output. +Output up to number lines of context before each matching line. Fewer +lines are output if the previous match or the start of the file is within +number lines, or if the processing buffer size has been set too small. If +file names and/or line numbers are being output, a hyphen separator is used +instead of a colon for the context lines. A line containing "--" is output +between each group of lines, unless they are in fact contiguous in the input +file. The value of number is expected to be relatively small. When +-c is used, -B is ignored.

    --binary-files=word @@ -190,8 +202,9 @@ return code.

    --buffer-size=number -Set the parameter that controls how much memory is used for buffering files -that are being scanned. +Set the parameter that controls how much memory is obtained at the start of +processing for buffering files that are being scanned. See also +--max-buffer-size below.

    -C number, --context=number @@ -201,14 +214,16 @@ This is equivalent to setting both -A and -B to the same value.

    -c, --count Do not output lines from the files that are being scanned; instead output the -number of matches (or non-matches if -v is used) that would otherwise -have caused lines to be shown. By default, this count is the same as the number -of suppressed lines, but if the -M (multiline) option is used (without --v), there may be more suppressed lines than the number of matches. +number of lines that would have been shown, either because they matched, or, if +-v is set, because they failed to match. By default, this count is +exactly the same as the number of lines that would have been output, but if the +-M (multiline) option is used (without -v), there may be more +suppressed lines than the count (that is, the number of matches).

    If no lines are selected, the number zero is output. If several files are are -being scanned, a count is output for each of them. However, if the +being scanned, a count is output for each of them and the -t option can +be used to cause a total to be output at the end. However, if the --files-with-matches option is also used, only those files whose counts are greater than zero are listed. When -c is used, the -A, -B, and -C options are ignored. @@ -230,12 +245,23 @@ because pcre2grep has to search for all possible matches in a line, not just one, in order to colour them all.

    -The colour that is used can be specified by setting the environment variable -PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a -string of two numbers, separated by a semicolon. They are copied directly into -the control string for setting colour on a terminal, so it is your -responsibility to ensure that they make sense. If neither of the environment -variables is set, the default is "1;31", which gives red. +The colour that is used can be specified by setting one of the environment +variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or +PCREGREP_COLOR, which are checked in that order. If none of these are set, +pcre2grep looks for GREP_COLORS or GREP_COLOR (in that order). The value +of the variable should be a string of two numbers, separated by a semicolon, +except in the case of GREP_COLORS, which must start with "ms=" or "mt=" +followed by two semicolon-separated colours, terminated by the end of the +string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is +ignored, and GREP_COLOR is checked. +
    +
    +If the string obtained from one of the above variables contains any characters +other than semicolon or digits, the setting is ignored and the default colour +is used. The string is copied directly into the control string for setting +colour on a terminal, so it is your responsibility to ensure that the values +make sense. If no relevant environment variable is set, the default is "1;31", +which gives red.

    -D action, --devices=action @@ -320,18 +346,18 @@ files; it does not apply to patterns specified by any of the --include or

    -f filename, --file=filename -Read patterns from the file, one per line, and match them against -each line of input. What constitutes a newline when reading the file is the -operating system's default. The --newline option has no effect on this -option. Trailing white space is removed from each line, and blank lines are -ignored. An empty file contains no patterns and therefore matches nothing. See -also the comments about multiple patterns versus a single pattern with -alternatives in the description of -e above. +Read patterns from the file, one per line, and match them against each line of +input. What constitutes a newline when reading the file is the operating +system's default. The --newline option has no effect on this option. +Trailing white space is removed from each line, and blank lines are ignored. An +empty file contains no patterns and therefore matches nothing. See also the +comments about multiple patterns versus a single pattern with alternatives in +the description of -e above.

    -If this option is given more than once, all the specified files are -read. A data line is output if any of the patterns match it. A file name can -be given as "-" to refer to the standard input. When -f is used, patterns +If this option is given more than once, all the specified files are read. A +data line is output if any of the patterns match it. A file name can be given +as "-" to refer to the standard input. When -f is used, patterns specified on the command line using -e may also be present; they are tested before the file's patterns. However, no other pattern is taken from the command line; all arguments are treated as the names of paths to be searched. @@ -501,19 +527,27 @@ There are no short forms for these options. The default settings are specified when the PCRE2 library is compiled, with the default default being 10 million.

    +\fB--max-buffer-size=number +This limits the expansion of the processing buffer, whose initial size can be +set by --buffer-size. The maximum buffer size is silently forced to be no +smaller than the starting buffer size. +

    +

    -M, --multiline -Allow patterns to match more than one line. When this option is given, patterns -may usefully contain literal newline characters and internal occurrences of ^ -and $ characters. The output for a successful match may consist of more than -one line. The first is the line in which the match started, and the last is the -line in which the match ended. If the matched string ends with a newline -sequence the output ends at the end of that line. +Allow patterns to match more than one line. When this option is set, the PCRE2 +library is called in "multiline" mode. This allows a matched string to extend +past the end of a line and continue on one or more subsequent lines. Patterns +used with -M may usefully contain literal newline characters and internal +occurrences of ^ and $ characters. The output for a successful match may +consist of more than one line. The first line is the line in which the match +started, and the last line is the line in which the match ended. If the matched +string ends with a newline sequence, the output ends at the end of that line. +If -v is set, none of the lines in a multi-line match are output. Once a +match has been handled, scanning restarts at the beginning of the line after +the one in which the match ended.

    -When this option is set, the PCRE2 library is called in "multiline" mode. -However, pcre2grep still processes the input line by line. The difference -is that a matched string may extend past the end of a line and continue on -one or more subsequent lines. The newline sequence must be matched as part of +The newline sequence that separates multiple lines must be matched as part of the pattern. For example, to find the phrase "regular expression" in a file where "regular" might be at the end of a line and "expression" at the start of the next line, you could use this command: @@ -526,11 +560,8 @@ well as possibly handling a two-character newline sequence.

    There is a limit to the number of lines that can be matched, imposed by the way -that pcre2grep buffers the input file as it scans it. However, -pcre2grep ensures that at least 8K characters or the rest of the file -(whichever is the shorter) are available for forward matching, and similarly -the previous 8K characters (or all the previous characters, if fewer than 8K) -are guaranteed to be available for lookbehind assertions. The -M option +that pcre2grep buffers the input file as it scans it. With a sufficiently +large processing buffer, this should not be a problem, but the -M option does not work when input is read line by line (see \fP--line-buffered\fP.)

    @@ -578,12 +609,13 @@ It should never be needed in normal use. Show only the part of the line that matched a pattern instead of the whole line. In this mode, no context is shown. That is, the -A, -B, and -C options are ignored. If there is more than one match in a line, each -of them is shown separately. If -o is combined with -v (invert the -sense of the match to find non-matching lines), no output is generated, but the -return code is set appropriately. If the matched portion of the line is empty, -nothing is output unless the file name or line number are being printed, in -which case they are shown on an otherwise empty line. This option is mutually -exclusive with --file-offsets and --line-offsets. +of them is shown separately, on a separate line of output. If -o is +combined with -v (invert the sense of the match to find non-matching +lines), no output is generated, but the return code is set appropriately. If +the matched portion of the line is empty, nothing is output unless the file +name or line number are being printed, in which case they are shown on an +otherwise empty line. This option is mutually exclusive with +--file-offsets and --line-offsets.

    -onumber, --only-matching=number @@ -597,10 +629,11 @@ capturing parentheses do not exist in the pattern, or were not set in the match, nothing is output unless the file name or line number are being output.

    -If this option is given multiple times, multiple substrings are output, in the -order the options are given. For example, -o3 -o1 -o3 causes the substrings -matched by capturing parentheses 3 and 1 and then 3 again to be output. By -default, there is no separator (but see the next option). +If this option is given multiple times, multiple substrings are output for each +match, in the order the options are given, and all on one line. For example, +-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and +then 3 again to be output. By default, there is no separator (but see the next +option).

    --om-separator=text @@ -631,6 +664,18 @@ quietly skipped. However, the return code is still 2, even if matches were found in other files.

    +-t, --total-count +This option is useful when scanning more than one file. If used on its own, +-t suppresses all output except for a grand total number of matching +lines (or non-matching lines if -v is used) in all the files. If -t +is used with -c, a grand total is output except when the previous output +is just one line. In other words, it is not output when just one file's count +is listed. If file names are being output, the grand total is preceded by +"TOTAL:". Otherwise, it appears as just another number. The -t option is +ignored when used with -L (list files without matches), because the grand +total would always be zero. +

    +

    -u, --utf-8 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled with UTF-8 support. All patterns (including those for any --exclude and @@ -658,11 +703,12 @@ specified by any of the --include or --exclude options.

    -x, --line-regex, --line-regexp Force the patterns to be anchored (each must start matching at the beginning of -a line) and in addition, require them to match entire lines. This is equivalent -to having ^ and $ characters at the start and end of each alternative top-level -branch in every pattern. This option applies only to the patterns that are -matched against the contents of files; it does not apply to patterns specified -by any of the --include or --exclude options. +a line) and in addition, require them to match entire lines. In multiline mode +the match may be more than one line. This is equivalent to having \A and \Z +characters at the start and end of each alternative top-level branch in every +pattern. This option applies only to the patterns that are matched against the +contents of files; it does not apply to patterns specified by any of the +--include or --exclude options.


    ENVIRONMENT VARIABLES

    @@ -735,7 +781,57 @@ The exceptions to the above are the --colour (or --color) and options does have data, it must be given in the first form, using an equals character. Otherwise pcre2grep will assume that it has no data.

    -
    MATCHING ERRORS
    +
    CALLING EXTERNAL SCRIPTS
    +

    +pcre2grep has, by default, support for calling external programs or +scripts during matching by making use of PCRE2's callout facility. However, +this support can be disabled when pcre2grep is built. You can find out +whether your binary has support for callouts by running it with the --help +option. If the support is not enabled, all callouts in patterns are ignored by +pcre2grep. +

    +

    +A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is +either a number or a quoted string (see the +pcre2callout +documentation for details). Numbered callouts are ignored by pcre2grep. +String arguments are parsed as a list of substrings separated by pipe (vertical +bar) characters. The first substring must be an executable name, with the +following substrings specifying arguments: +

    +  executable_name|arg1|arg2|...
    +
    +Any substring (including the executable name) may contain escape sequences +started by a dollar character: $<digits> or ${<digits>} is replaced by the +captured substring of the given decimal number, which must be greater than +zero. If the number is greater than the number of capturing substrings, or if +the capture is unset, the replacement is empty. +

    +

    +Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar and $| is replaced by a pipe character. Here is an example: +

    +  echo -e "abcde\n12345" | pcre2grep \
    +    '(?x)(.)(..(.))
    +    (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
    +
    +  Output:
    +
    +    Arg1: [a] [bcd] [d] Arg2: |a| ()
    +    abcde
    +    Arg1: [1] [234] [4] Arg2: |1| ()
    +    12345
    +
    +The parameters for the execv() system call that is used to run the +program or script are zero-terminated strings. This means that binary zero +characters in the callout argument will cause premature termination of their +substrings, and therefore should not be present. Any syntax errors in the +string (for example, a dollar not followed by another character) cause the +callout to be ignored. If running the program fails for any reason (including +the non-existence of the executable), a local matching failure occurs and the +matcher backtracks in the normal way. +

    +
    MATCHING ERRORS

    It is possible to supply a regular expression that takes a very long time to fail to match certain lines. Such patterns normally involve nested indefinite @@ -751,7 +847,7 @@ overall resource limit; there is a second option called --recursion-limit that sets a limit on the amount of memory (usually stack) that is used (see the discussion of these options above).

    -
    DIAGNOSTICS
    +
    DIAGNOSTICS

    Exit status is 0 if any matches were found, 1 if no matches were found, and 2 for syntax errors, overlong lines, non-existent or inaccessible files (even if @@ -759,11 +855,11 @@ matches were found in other files) or too many matching errors. Using the -s option to suppress error messages about inaccessible files does not affect the return code.

    -
    SEE ALSO
    +
    SEE ALSO

    -pcre2pattern(3), pcre2syntax(3). +pcre2pattern(3), pcre2syntax(3), pcre2callout(3).

    -
    AUTHOR
    +
    AUTHOR

    Philip Hazel
    @@ -772,11 +868,11 @@ University Computing Service Cambridge, England.

    -
    REVISION
    +
    REVISION

    -Last updated: 03 January 2015 +Last updated: 31 December 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2jit.html b/pcre2/doc/html/pcre2jit.html index 9e3207340..4a6d4ff37 100644 --- a/pcre2/doc/html/pcre2jit.html +++ b/pcre2/doc/html/pcre2jit.html @@ -86,6 +86,13 @@ results. The returned value from pcre2_jit_compile() is zero on success, or a negative error code.

    +There is a limit to the size of pattern that JIT supports, imposed by the size +of machine stack that it uses. The exact rules are not documented because they +may change at any time, in particular, when new optimizations are introduced. +If a pattern is too big, a call to \fBpcre2_jit_compile()\fB returns +PCRE2_ERROR_NOMEMORY. +

    +

    PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should set one or both @@ -145,6 +152,10 @@ PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED option is not supported at match time.

    +If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the +use of JIT, forcing matching by the interpreter code. +

    +

    The only unsupported pattern items are \C (match a single data unit) when running in a UTF mode, and a callout immediately before an assertion condition in a conditional group. @@ -224,8 +235,14 @@ whether a match operation was executed by JIT or by the interpreter.

    You may safely use the same JIT stack for more than one pattern (either by -assigning directly or by callback), as long as the patterns are all matched -sequentially in the same thread. In a multithread application, if you do not +assigning directly or by callback), as long as the patterns are matched +sequentially in the same thread. Currently, the only way to set up +non-sequential matches in one thread is to use callouts: if a callout function +starts another match, that match must use a different JIT stack to the one used +for currently suspended match(es). +

    +

    +In a multithread application, if you do not specify a JIT stack, or if you assign or pass back NULL from a callback, that is thread-safe, because each thread has its own machine stack. However, if you assign or pass back a non-NULL JIT stack, this must be a different stack for @@ -390,7 +407,7 @@ The fast path function is called pcre2_jit_match(), and it takes exactly the same arguments as pcre2_match(). The return values are also the same, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested that was not compiled. Unsupported option bits (for example, -PCRE2_ANCHORED) are ignored. +PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT option.

    When you call pcre2_match(), as well as testing for invalid options, a @@ -419,9 +436,9 @@ Cambridge, England.


    REVISION

    -Last updated: 27 November 2014 +Last updated: 05 June 2016
    -Copyright © 1997-2014 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2limits.html b/pcre2/doc/html/pcre2limits.html index b1c06f55f..d7e382bfc 100644 --- a/pcre2/doc/html/pcre2limits.html +++ b/pcre2/doc/html/pcre2limits.html @@ -32,6 +32,11 @@ However, the speed of execution is slower. In the 32-bit library, the internal linkage size is always 4.

    +The maximum length of a source pattern string is essentially unlimited; it is +the largest number a PCRE2_SIZE variable can hold. However, the program that +calls pcre2_compile() can specify a smaller limit. +

    +

    The maximum length (in code units) of a subject string is one less than the largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned integer type, usually defined as size_t. Its maximum value (that is @@ -50,17 +55,16 @@ documentation. All values in repeating quantifiers must be less than 65536.

    +The maximum length of a lookbehind assertion is 65535 characters. +

    +

    There is no limit to the number of parenthesized subpatterns, but there can be no more than 65535 capturing subpatterns. There is, however, a limit to the depth of nesting of parenthesized subpatterns of all kinds. This is imposed in -order to limit the amount of system stack used at compile time. The limit can -be specified when PCRE2 is built; the default is 250. -

    -

    -There is a limit to the number of forward references to subsequent subpatterns -of around 200,000. Repeated forward references with fixed upper limits, for -example, (?2){0,100} when subpattern number 2 is to the right, are included in -the count. There is no limit to the number of backward references. +order to limit the amount of system stack used at compile time. The default +limit can be specified when PCRE2 is built; the default default is 250. An +application can change this limit by calling pcre2_set_parens_nest_limit() to +set the limit in a compile context.

    The maximum length of name for a named subpattern is 32 code units, and the @@ -68,7 +72,12 @@ maximum number of named subpatterns is 10000.

    The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb -is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries. +is 255 code units for the 8-bit library and 65535 code units for the 16-bit and +32-bit libraries. +

    +

    +The maximum length of a string argument to a callout is the largest number a +32-bit unsigned integer can hold.


    AUTHOR @@ -85,9 +94,9 @@ Cambridge, England. REVISION

    -Last updated: 25 November 2014 +Last updated: 26 October 2016
    -Copyright © 1997-2014 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2pattern.html b/pcre2/doc/html/pcre2pattern.html index a9ca60e62..58eb0e6d5 100644 --- a/pcre2/doc/html/pcre2pattern.html +++ b/pcre2/doc/html/pcre2pattern.html @@ -190,6 +190,12 @@ be less than the value set (or defaulted) by the caller of pcre2_match() for it to have any effect. In other words, the pattern writer can lower the limits set by the programmer, but not raise them. If there is more than one setting of one of these limits, the lower value is used. +

    +

    +The match limit is used (but in a different way) when JIT is being used, but it +is not relevant, and is ignored, when matching with pcre2_dfa_match(). +However, the recursion limit is relevant for DFA matching, which does use some +function recursion, in particular, for recursions within the pattern.


    Newline conventions @@ -379,32 +385,31 @@ case letter, it is converted to upper case. Then bit 6 of the character (hex 40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and \c; becomes hex 7B (; is 3B). If the code unit following \c has a value less than 32 or greater than 126, a -compile-time error occurs. This locks out non-printable ASCII characters in all -modes. +compile-time error occurs.

    When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t generate the appropriate EBCDIC code values. The \c escape is processed as specified for Perl in the perlebcdic document. The only characters that are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. Any -other character provokes a compile-time error. The sequence \@ encodes -character code 0; the letters (in either case) encode characters 1-26 (hex 01 -to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and -\? becomes either 255 (hex FF) or 95 (hex 5F). +other character provokes a compile-time error. The sequence \c@ encodes +character code 0; after \c the letters (in either case) encode characters 1-26 +(hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex +1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F).

    -Thus, apart from \?, these escapes generate the same character code values as +Thus, apart from \c?, these escapes generate the same character code values as they do in an ASCII environment, though the meanings of the values mostly -differ. For example, \G always generates code value 7, which is BEL in ASCII +differ. For example, \cG always generates code value 7, which is BEL in ASCII but DEL in EBCDIC.

    -The sequence \? generates DEL (127, hex 7F) in an ASCII environment, but +The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but because 127 is not a control character in EBCDIC, Perl makes it generate the APC character. Unfortunately, there are several variants of EBCDIC. In most of them the APC character has the value 255 (hex FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC -values, PCRE2 makes \? generate 95; otherwise it generates 255. +values, PCRE2 makes \c? generate 95; otherwise it generates 255.

    After \0 up to two further octal digits are read. If there are fewer than two @@ -526,9 +531,9 @@ by code point, as described in the previous section. Absolute and relative back references

    -The sequence \g followed by an unsigned or a negative number, optionally -enclosed in braces, is an absolute or relative back reference. A named back -reference can be coded as \g{name}. Back references are discussed +The sequence \g followed by a signed or unsigned number, optionally enclosed +in braces, is an absolute or relative back reference. A named back reference +can be coded as \g{name}. Back references are discussed later, following the discussion of parenthesized subpatterns. @@ -669,8 +674,8 @@ This is an example of an "atomic group", details of which are given This particular group matches either the two-character sequence CR followed by LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next -line, U+0085). The two-character sequence is treated as a single unit that -cannot be split. +line, U+0085). Because this is an atomic group, the two-character sequence is +treated as a single unit that cannot be split.

    In other modes, two additional characters whose codepoints are greater than 255 @@ -736,6 +741,8 @@ Those that are not part of an identified script are lumped together as "Common". The current list of scripts is:

    +Ahom, +Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, @@ -776,6 +783,7 @@ Gurmukhi, Han, Hangul, Hanunoo, +Hatran, Hebrew, Hiragana, Imperial_Aramaic, @@ -812,12 +820,14 @@ Miao, Modi, Mongolian, Mro, +Multani, Myanmar, Nabataean, New_Tai_Lue, Nko, Ogham, Ol_Chiki, +Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, @@ -839,6 +849,7 @@ Saurashtra, Sharada, Shavian, Siddham, +SignWriting, Sinhala, Sora_Sompeng, Sundanese, @@ -1180,6 +1191,16 @@ when the startoffset argument of pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is set.

    +When the newline convention (see +"Newline conventions" +below) recognizes the two-character sequence CRLF as a newline, this is +preferred, even if the single characters CR and LF are also recognized as +newlines. For example, if the newline convention is "any", a multiline mode +circumflex matches before "xyz" in the string "abc\r\nxyz" rather than after +CR, even though CR on its own is a valid newline. (It also matches at the very +start of the string, of course.) +

    +

    Note that the sequences \A, \Z, and \z can be used to match the start and end of the subject in both modes, and if all branches of a pattern start with \A it is always anchored, whether or not PCRE2_MULTILINE is set. @@ -1230,20 +1251,32 @@ with \C in UTF-8 or UTF-16 mode means that the rest of the string may start with a malformed UTF character. This has undefined results, because PCRE2 assumes that it is matching character by character in a valid UTF string (by default it checks the subject string's validity at the start of processing -unless the PCRE2_NO_UTF_CHECK option is used). An application can lock out the -use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. +unless the PCRE2_NO_UTF_CHECK option is used). +

    +

    +An application can lock out the use of \C by setting the +PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to +build PCRE2 with the use of \C permanently disabled.

    PCRE2 does not allow \C to appear in lookbehind assertions (described below) -in a UTF mode, because this would make it impossible to calculate the length of -the lookbehind. +in UTF-8 or UTF-16 modes, because this would make it impossible to calculate +the length of the lookbehind. Neither the alternative matching function +pcre2_dfa_match() nor the JIT optimizer support \C in these UTF modes. +The former gives a match-time error; the latter fails to optimize and so the +match is always run using the interpreter. +

    +

    +In the 32-bit library, however, \C is always supported (when not explicitly +locked out) because it always matches a single code unit, whether or not UTF-32 +is specified.

    In general, the \C escape sequence is best avoided. However, one way of using -it that avoids the problem of malformed UTF characters is to use a lookahead to -check the length of the next character, as in this pattern, which could be used -with a UTF-8 string (ignore white space and line breaks): +it that avoids the problem of malformed UTF-8 or UTF-16 characters is to use a +lookahead to check the length of the next character, as in this pattern, which +could be used with a UTF-8 string (ignore white space and line breaks):

       (?| (?=[\x00-\x7f])(\C) |
           (?=[\x80-\x{7ff}])(\C)(\C) |
    @@ -1298,42 +1331,6 @@ whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A
     class such as [^a] always matches one of these characters.
     

    -The minus (hyphen) character can be used to specify a range of characters in a -character class. For example, [d-m] matches any letter between d and m, -inclusive. If a minus character is required in a class, it must be escaped with -a backslash or appear in a position where it cannot be interpreted as -indicating a range, typically as the first or last character in the class, or -immediately after a range. For example, [b-d-z] matches letters in the range b -to d, a hyphen character, or z. -

    -

    -It is not possible to have the literal character "]" as the end character of a -range. A pattern such as [W-]46] is interpreted as a class of two characters -("W" and "-") followed by a literal string "46]", so it would match "W46]" or -"-46]". However, if the "]" is escaped with a backslash it is interpreted as -the end of range, so [W-\]46] is interpreted as a class containing a range -followed by two other characters. The octal or hexadecimal representation of -"]" can also be used to end a range. -

    -

    -An error is generated if a POSIX character class (see below) or an escape -sequence other than one that defines a single character appears at a point -where a range ending character is expected. For example, [z-\xff] is valid, -but [A-\d] and [A-[:digit:]] are not. -

    -

    -Ranges operate in the collating sequence of character values. They can also be -used for characters specified numerically, for example [\000-\037]. Ranges -can include any characters that are valid for the current mode. -

    -

    -If a range that includes letters is used when caseless matching is set, it -matches the letters in either case. For example, [W-c] is equivalent to -[][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character -tables for a French locale are in use, [\xc8-\xcb] matches accented E -characters in both cases. -

    -

    The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v, \V, \w, and \W may appear in a character class, and add the characters that they match to the class. For example, [\dABCDEF] matches any hexadecimal @@ -1347,6 +1344,52 @@ are not special inside a character class. Like any other unrecognized escape sequences, they cause an error.

    +The minus (hyphen) character can be used to specify a range of characters in a +character class. For example, [d-m] matches any letter between d and m, +inclusive. If a minus character is required in a class, it must be escaped with +a backslash or appear in a position where it cannot be interpreted as +indicating a range, typically as the first or last character in the class, +or immediately after a range. For example, [b-d-z] matches letters in the range +b to d, a hyphen character, or z. +

    +

    +Perl treats a hyphen as a literal if it appears before or after a POSIX class +(see below) or a character type escape such as as \d, but gives a warning in +its warning mode, as this is most likely a user error. As PCRE2 has no facility +for warning, an error is given in these cases. +

    +

    +It is not possible to have the literal character "]" as the end character of a +range. A pattern such as [W-]46] is interpreted as a class of two characters +("W" and "-") followed by a literal string "46]", so it would match "W46]" or +"-46]". However, if the "]" is escaped with a backslash it is interpreted as +the end of range, so [W-\]46] is interpreted as a class containing a range +followed by two other characters. The octal or hexadecimal representation of +"]" can also be used to end a range. +

    +

    +Ranges normally include all code points between the start and end characters, +inclusive. They can also be used for code points specified numerically, for +example [\000-\037]. Ranges can include any characters that are valid for the +current mode. +

    +

    +There is a special case in EBCDIC environments for ranges whose end points are +both specified as literal letters in the same case. For compatibility with +Perl, EBCDIC code points within the range that are not letters are omitted. For +example, [h-k] matches only four characters, even though the codes for h and k +are 0x88 and 0x92, a range of 11 code points. However, if the range is +specified numerically, for example, [\x88-\x92] or [h-\x92], all code points +are included. +

    +

    +If a range that includes letters is used when caseless matching is set, it +matches the letters in either case. For example, [W-c] is equivalent to +[][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character +tables for a French locale are in use, [\xc8-\xcb] matches accented E +characters in both cases. +

    +

    A circumflex can conveniently be used with the upper case character types to specify a more restricted set of characters than the matching lower case type. For example, the class [^\W_] matches any letter or digit, but not underscore, @@ -1514,13 +1557,8 @@ respectively.

    When one of these option changes occurs at top level (that is, not inside subpattern parentheses), the change applies to the remainder of the pattern -that follows. If the change is placed right at the start of a pattern, PCRE2 -extracts it into the global options (and it will therefore show up in data -extracted by the pcre2_pattern_info() function). -

    -

    -An option change within a subpattern (see below for a description of -subpatterns) affects only that part of the subpattern that follows it, so +that follows. An option change within a subpattern (see below for a description +of subpatterns) affects only that part of the subpattern that follows it, so

       (a(?i)b)c
     
    @@ -1649,6 +1687,10 @@ first one in the pattern with the given number. The following pattern matches
       /(?|(abc)|(def))(?1)/
     
    +A relative reference such as (?-1) is no different: it is just a convenient way +of computing an absolute group number. +

    +

    If a condition test for a subpattern's having matched refers to a non-unique number, the test is @@ -2051,9 +2093,9 @@ subpattern is possible using named parentheses (see below).

    Another way of avoiding the ambiguity inherent in the use of digits following a -backslash is to use the \g escape sequence. This escape must be followed by an -unsigned number or a negative number, optionally enclosed in braces. These -examples are all identical: +backslash is to use the \g escape sequence. This escape must be followed by a +signed or unsigned number, optionally enclosed in braces. These examples are +all identical:

       (ring), \1
       (ring), \g1
    @@ -2061,8 +2103,7 @@ examples are all identical:
     
    An unsigned number specifies an absolute reference without the ambiguity that is present in the older syntax. It is also useful when literal digits follow -the reference. A negative number is a relative reference. Consider this -example: +the reference. A signed number is a relative reference. Consider this example:
       (abc(def)ghi)\g{-1}
     
    @@ -2073,6 +2114,11 @@ can be helpful in long patterns, and also in patterns that are created by joining together fragments that contain references within themselves.

    +The sequence \g{+1} is a reference to the next capturing subpattern. This kind +of forward reference can be useful it patterns that repeat. Perl does not +support the use of + in this way. +

    +

    A back reference matches whatever actually matched the capturing subpattern in the current subject string, rather than anything matching the subpattern itself (see @@ -2172,6 +2218,14 @@ capturing is carried out only for positive assertions. (Perl sometimes, but not always, does do capturing in negative assertions.)

    +WARNING: If a positive assertion containing one or more capturing subpatterns +succeeds, but failure to match later in the pattern causes backtracking over +this assertion, the captures within the assertion are reset only if no higher +numbered captures are already set. This is, unfortunately, a fundamental +limitation of the current implementation; it may get removed in a future +reworking. +

    +

    For compatibility with Perl, most assertion subpatterns may be repeated; though it makes no sense to assert the same thing several times, the side effect of capturing parentheses may occasionally be useful. However, an assertion that @@ -2268,18 +2322,31 @@ match. If there are insufficient characters before the current position, the assertion fails.

    -In a UTF mode, PCRE2 does not allow the \C escape (which matches a single code -unit even in a UTF mode) to appear in lookbehind assertions, because it makes -it impossible to calculate the length of the lookbehind. The \X and \R -escapes, which can match different numbers of code units, are also not -permitted. +In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which matches a +single code unit even in a UTF mode) to appear in lookbehind assertions, +because it makes it impossible to calculate the length of the lookbehind. The +\X and \R escapes, which can match different numbers of code units, are never +permitted in lookbehinds.

    "Subroutine" calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long -as the subpattern matches a fixed-length string. -Recursion, -however, is not supported. +as the subpattern matches a fixed-length string. However, +recursion, +that is, a "subroutine" call into a group that is already active, +is not supported. +

    +

    +Perl does not support back references in lookbehinds. PCRE2 does support them, +but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option +must not be set, there must be no use of (?| in the pattern (it creates +duplicate subpattern numbers), and if the back reference is by name, the name +must be unique. Of course, the referenced subpattern must itself be of fixed +length. The following pattern matches words containing at least two characters +that begin and end with the same character: +

    +   \b(\w)\w++(?<=\1)
    +

    Possessive quantifiers can be used in conjunction with lookbehind assertions to @@ -2417,7 +2484,9 @@ Checking for a used subpattern by name

    Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used subpattern by name. For compatibility with earlier versions of PCRE1, which had -this facility before Perl, the syntax (?(name)...) is also recognized. +this facility before Perl, the syntax (?(name)...) is also recognized. Note, +however, that undelimited names consisting of the letter R followed by digits +are ambiguous (see the following section).

    Rewriting the above example to use a named subpattern gives this: @@ -2432,30 +2501,52 @@ matched. Checking for pattern recursion

    -If the condition is the string (R), and there is no subpattern with the name R, -the condition is true if a recursive call to the whole pattern or any -subpattern has been made. If digits or a name preceded by ampersand follow the -letter R, for example: +"Recursion" in this sense refers to any subroutine-like call from one part of +the pattern to another, whether or not it is actually recursive. See the +sections entitled +"Recursive patterns" +and +"Subpatterns as subroutines" +below for details of recursion and subpattern calls. +

    +

    +If a condition is the string (R), and there is no subpattern with the name R, +the condition is true if matching is currently in a recursion or subroutine +call to the whole pattern or any subpattern. If digits follow the letter R, and +there is no subpattern with that name, the condition is true if the most recent +call is into a subpattern with the given number, which must exist somewhere in +the overall pattern. This is a contrived example that is equivalent to a+b:

    -  (?(R3)...) or (?(R&name)...)
    +  ((?(R1)a+|(?1)b))
     
    -the condition is true if the most recent recursion is into a subpattern whose -number or name is given. This condition does not check the entire recursion -stack. If the name used in a condition of this kind is a duplicate, the test is -applied to all subpatterns of the same name, and is true if any one of them is -the most recent recursion. +However, in both cases, if there is a subpattern with a matching name, the +condition tests for its being set, as described in the section above, instead +of testing for recursion. For example, creating a group with the name R1 by +adding (?<R1>) to the above pattern completely changes its meaning. +

    +

    +If a name preceded by ampersand follows the letter R, for example: +

    +  (?(R&name)...)
    +
    +the condition is true if the most recent recursion is into a subpattern of that +name (which must exist within the pattern). +

    +

    +This condition does not check the entire recursion stack. It tests only the +current level. If the name used in a condition of this kind is a duplicate, the +test is applied to all subpatterns of the same name, and is true if any one of +them is the most recent recursion.

    At "top level", all these recursion test conditions are false. -The syntax for recursive patterns -is described below.


    Defining subpatterns for use by reference only

    -If the condition is the string (DEFINE), and there is no subpattern with the -name DEFINE, the condition is always false. In this case, there may be only one +If the condition is the string (DEFINE), the condition is always false, even if +there is a group with the name DEFINE. In this case, there may be only one alternative in the subpattern. It is always skipped if control reaches this point in the pattern; the idea of DEFINE is that it can be used to define subroutines that can be referenced from elsewhere. (The use of @@ -2489,7 +2580,8 @@ For example: (?(VERSION>=10.4)yes|no)

    This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or -"no" otherwise. +"no" otherwise. The fractional part of the version number may not contain more +than two digits.


    Assertion conditions @@ -2602,6 +2694,21 @@ parentheses preceding the recursion. In other words, a negative number counts capturing parentheses leftwards from the point at which it is encountered.

    +Be aware however, that if +duplicate subpattern numbers +are in use, relative references refer to the earliest subpattern with the +appropriate number. Consider, for example: +

    +  (?|(a)|(b)) (c) (?-2)
    +
    +The first two capturing groups (a) and (b) are both numbered 1, and group (c) +is number 2. When the reference (?-2) is encountered, the second most recently +opened parentheses has the number 1, but it is the first such group (the (a) +group) to which the recursion refers. This would be the same if an absolute +reference (?1) was used. In other words, relative references are just a +shorthand for computing a group number. +

    +

    It is also possible to refer to subsequently opened parentheses, by writing references such as (?+2). However, these cannot be recursive because the reference is not inside the parentheses that are referenced. They are always @@ -2899,14 +3006,36 @@ remarks apply to the PCRE2 features described in this section.

    The new verbs make use of what was previously invalid syntax: an opening -parenthesis followed by an asterisk. They are generally of the form -(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving -differently depending on whether or not a name is present. A name is any -sequence of characters that does not include a closing parenthesis. The maximum -length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit -libraries. If the name is empty, that is, if the closing parenthesis -immediately follows the colon, the effect is as if the colon were not there. -Any number of these verbs may occur in a pattern. +parenthesis followed by an asterisk. They are generally of the form (*VERB) or +(*VERB:NAME). Some verbs take either form, possibly behaving differently +depending on whether or not a name is present. +

    +

    +By default, for compatibility with Perl, a name is any sequence of characters +that does not include a closing parenthesis. The name is not processed in +any way, and it is not possible to include a closing parenthesis in the name. +This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result +is no longer Perl-compatible. +

    +

    +When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names +and only an unescaped closing parenthesis terminates the name. However, the +only backslash items that are permitted are \Q, \E, and sequences such as +\x{100} that define character code points. Character type escapes such as \d +are faulted. +

    +

    +A closing parenthesis can be included in a name either as \) or between \Q +and \E. In addition to backslash processing, if the PCRE2_EXTENDED option is +also set, unescaped whitespace in verb names is skipped, and #-comments are +recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not +affect verb names unless PCRE2_ALT_VERBNAMES is also set. +

    +

    +The maximum length of a name is 255 in the 8-bit library and 65535 in the +16-bit and 32-bit libraries. If the name is empty, that is, if the closing +parenthesis immediately follows the colon, the effect is as if the colon were +not there. Any number of these verbs may occur in a pattern.

    Since these verbs are specifically related to backtracking, most of them can be @@ -3323,9 +3452,9 @@ Cambridge, England.


    REVISION

    -Last updated: 13 June 2015 +Last updated: 27 December 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2perform.html b/pcre2/doc/html/pcre2perform.html index 3b6a4a6c8..ac9d23cd8 100644 --- a/pcre2/doc/html/pcre2perform.html +++ b/pcre2/doc/html/pcre2perform.html @@ -12,17 +12,21 @@ This page is part of the PCRE2 HTML documentation. It was generated automatically from the original man page. If there is any nonsense in it, please consult the man page, in case the conversion went wrong.
    -
    -PCRE2 PERFORMANCE -
    +

    +
    PCRE2 PERFORMANCE

    Two aspects of performance are discussed below: memory usage and processing time. The way you express your pattern as a regular expression can affect both of them.

    -
    -COMPILED PATTERN MEMORY USAGE -
    +
    COMPILED PATTERN MEMORY USAGE

    Patterns are compiled by PCRE2 into a reasonably efficient interpretive code, so that most simple patterns do not use much memory. However, there is one case @@ -75,9 +79,7 @@ pattern. Nevertheless, if the atomic grouping is not a problem and the loss of speed is acceptable, this kind of rewriting will allow you to process patterns that PCRE2 cannot otherwise handle.

    -
    -STACK USAGE AT RUN TIME -
    +
    STACK USAGE AT RUN TIME

    When pcre2_match() is used for matching, certain kinds of pattern can cause it to use large amounts of the process stack. In some environments the @@ -86,9 +88,7 @@ SIGSEGV. Rewriting your pattern can often help. The pcre2stack documentation discusses this issue in detail.

    -
    -PROCESSING TIME -
    +
    PROCESSING TIME

    Certain items in regular expression patterns are processed more efficiently than others. It is more efficient to use a character class like [aeiou] than a @@ -177,9 +177,7 @@ appreciable time with strings longer than about 20 characters. In many cases, the solution to this kind of performance issue is to use an atomic group or a possessive quantifier.

    -
    -AUTHOR -
    +
    AUTHOR

    Philip Hazel
    @@ -188,9 +186,7 @@ University Computing Service Cambridge, England.

    -
    -REVISION -
    +
    REVISION

    Last updated: 02 January 2015
    diff --git a/pcre2/doc/html/pcre2posix.html b/pcre2/doc/html/pcre2posix.html index 5e4b5a3b4..1d5fe6356 100644 --- a/pcre2/doc/html/pcre2posix.html +++ b/pcre2/doc/html/pcre2posix.html @@ -48,7 +48,7 @@ This set of functions provides a POSIX-style API for the PCRE2 regular expression 8-bit library. See the pcre2api documentation for a description of PCRE2's native API, which contains much -additional functionality. There is no POSIX-style wrapper for PCRE2's 16-bit +additional functionality. There are no POSIX-style wrappers for PCRE2's 16-bit and 32-bit libraries.

    @@ -67,9 +67,9 @@ POSIX interface often use it, this makes it easier to slot in PCRE2 as a replacement library. Other POSIX options are not even defined.

    -There are also some other options that are not defined by POSIX. These have -been added at the request of users who want to make use of certain -PCRE2-specific features via the POSIX calling interface. +There are also some options that are not defined by POSIX. These have been +added at the request of users who want to make use of certain PCRE2-specific +features via the POSIX calling interface.

    When PCRE2 is called via these functions, it is only the API that is POSIX-like @@ -119,11 +119,11 @@ defined POSIX behaviour for REG_NEWLINE (see the following section).

       REG_NOSUB
     
    -The PCRE2_NO_AUTO_CAPTURE option is set when the regular expression is passed -for compilation to the native function. In addition, when a pattern that is -compiled with this flag is passed to regexec() for matching, the -nmatch and pmatch arguments are ignored, and no captured strings -are returned. +When a pattern that is compiled with this flag is passed to regexec() for +matching, the nmatch and pmatch arguments are ignored, and no +captured strings are returned. Versions of the PCRE library prior to 10.22 used +to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens +because it disables the use of back references.
       REG_UCP
     
    @@ -170,7 +170,7 @@ use the contents of the preg structure. If, for example, you pass it to This area is not simple, because POSIX and Perl take different views of things. It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was never intended to be a POSIX engine. The following table lists the different -possibilities for matching newline characters in PCRE2: +possibilities for matching newline characters in Perl and PCRE2:
                               Default   Change with
     
    @@ -180,7 +180,7 @@ possibilities for matching newline characters in PCRE2:
       $ matches \n in middle     no     PCRE2_MULTILINE
       ^ matches \n in middle     no     PCRE2_MULTILINE
     
    -This is the equivalent table for POSIX: +This is the equivalent table for a POSIX-compatible pattern matcher:
                               Default   Change with
     
    @@ -190,14 +190,18 @@ This is the equivalent table for POSIX:
       $ matches \n in middle     no     REG_NEWLINE
       ^ matches \n in middle     no     REG_NEWLINE
     
    -PCRE2's behaviour is the same as Perl's, except that there is no equivalent for -PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there is no way to stop -newline from matching [^a]. +This behaviour is not what happens when PCRE2 is called via its POSIX +API. By default, PCRE2's behaviour is the same as Perl's, except that there is +no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there +is no way to stop newline from matching [^a].

    -The default POSIX newline handling can be obtained by setting PCRE2_DOTALL and -PCRE2_DOLLAR_ENDONLY, but there is no way to make PCRE2 behave exactly as for -the REG_NEWLINE action. +Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and +PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but there is +no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using +the POSIX API, passing REG_NEWLINE to PCRE2's regcomp() function +causes PCRE2_MULTILINE to be passed to pcre2_compile(), and REG_DOTALL +passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY.


    MATCHING A PATTERN

    @@ -231,19 +235,21 @@ to have a terminating NUL located at string + pmatch[0].rm_eo IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software intended to be portable to other systems. Note that a non-zero rm_so does not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not -how it is matched. +how it is matched. Setting REG_STARTEND and passing pmatch as NULL are +mutually exclusive; the error REG_INVARG is returned.

    If the pattern was compiled with the REG_NOSUB flag, no data about any matched strings is returned. The nmatch and pmatch arguments of -regexec() are ignored. +regexec() are ignored (except possibly as input for REG_STARTEND).

    -If the value of nmatch is zero, or if the value pmatch is NULL, -no data about any matched strings is returned. +The value of nmatch may be zero, and the value pmatch may be NULL +(unless REG_STARTEND is set); in both these cases no data about any matched +strings is returned.

    -Otherwise,the portion of the string that was matched, and also any captured +Otherwise, the portion of the string that was matched, and also any captured substrings, are returned via the pmatch argument, which points to an array of nmatch structures of type regmatch_t, containing the members rm_so and rm_eo. These contain the byte offset to the first @@ -262,9 +268,11 @@ header file, of which REG_NOMATCH is the "expected" failure code. The regerror() function maps a non-zero errorcode from either regcomp() or regexec() to a printable message. If preg is not NULL, the error should have arisen from the use of that structure. A message -terminated by a binary zero is placed in errbuf. The length of the -message, including the zero, is limited to errbuf_size. The yield of the -function is the size of buffer needed to hold the whole message. +terminated by a binary zero is placed in errbuf. If the buffer is too +short, only the first errbuf_size - 1 characters of the error message are +used. The yield of the function is the size of buffer needed to hold the whole +message, including the terminating zero. This value is greater than +errbuf_size if the message was truncated.


    MEMORY USAGE

    @@ -283,9 +291,9 @@ Cambridge, England.


    REVISION

    -Last updated: 20 October 2014 +Last updated: 31 January 2016
    -Copyright © 1997-2014 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2sample.html b/pcre2/doc/html/pcre2sample.html index 60a928bcc..2b36f1fc6 100644 --- a/pcre2/doc/html/pcre2sample.html +++ b/pcre2/doc/html/pcre2sample.html @@ -24,12 +24,11 @@ documentation. If you do not have a copy of the PCRE2 distribution, you can save this listing to re-create the contents of pcre2demo.c.

    -The demonstration program, which uses the PCRE2 8-bit library, compiles the -regular expression that is its first argument, and matches it against the -subject string in its second argument. No PCRE2 options are set, and default -character tables are used. If matching succeeds, the program outputs the -portion of the subject that matched, together with the contents of any captured -substrings. +The demonstration program compiles the regular expression that is its +first argument, and matches it against the subject string in its second +argument. No PCRE2 options are set, and default character tables are used. If +matching succeeds, the program outputs the portion of the subject that matched, +together with the contents of any captured substrings.

    If the -g option is given on the command line, the program then goes on to @@ -38,34 +37,39 @@ string. The logic is a little bit tricky because of the possibility of matching an empty string. Comments in the code explain what is going on.

    +The code in pcre2demo.c is an 8-bit program that uses the PCRE2 8-bit +library. It handles strings and characters that are stored in 8-bit code units. +By default, one character corresponds to one code unit, but if the pattern +starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, +where characters may occupy multiple code units. +

    +

    If PCRE2 is installed in the standard include and library directories for your operating system, you should be able to compile the demonstration program using -this command: +a command like this:

    -  gcc -o pcre2demo pcre2demo.c -lpcre2-8
    +  cc -o pcre2demo pcre2demo.c -lpcre2-8
     
    If PCRE2 is installed elsewhere, you may need to add additional options to the command line. For example, on a Unix-like system that has PCRE2 installed in /usr/local, you can compile the demonstration program using a command like this:
    -  gcc -o pcre2demo -I/usr/local/include pcre2demo.c -L/usr/local/lib -lpcre2-8
    -
    -
    -

    -

    -Once you have compiled and linked the demonstration program, you can run simple -tests like this: + cc -o pcre2demo -I/usr/local/include pcre2demo.c -L/usr/local/lib -lpcre2-8 +

  • +Once you have built the demonstration program, you can run simple tests like +this:
       ./pcre2demo 'cat|dog' 'the cat sat on the mat'
       ./pcre2demo -g 'cat|dog' 'the dog sat on the cat'
     
    Note that there is a much more comprehensive test program, called pcre2test, -which supports many more facilities for testing regular expressions using the -PCRE2 libraries. The +which supports many more facilities for testing regular expressions using all +three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be +installed). The pcre2demo -program is provided as a simple coding example. +program is provided as a relatively simple coding example.

    If you try to run @@ -73,7 +77,7 @@ If you try to run when PCRE2 is not installed in the standard library directory, you may get an error like this on some operating systems (e.g. Solaris):

    -  ld.so.1: a.out: fatal: libpcre2.so.0: open failed: No such file or directory
    +  ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory
     
    This is caused by the way shared library support works on those systems. You need to add @@ -97,9 +101,9 @@ Cambridge, England. REVISION

    -Last updated: 20 October 2014 +Last updated: 02 February 2016
    -Copyright © 1997-2014 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2serialize.html b/pcre2/doc/html/pcre2serialize.html index c32ebe034..edf415afd 100644 --- a/pcre2/doc/html/pcre2serialize.html +++ b/pcre2/doc/html/pcre2serialize.html @@ -14,10 +14,11 @@ please consult the man page, in case the conversion went wrong.


    SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS

    @@ -41,14 +42,22 @@ If you are running an application that uses a large number of regular expression patterns, it may be useful to store them in a precompiled form instead of having to compile them every time the application is run. However, if you are using the just-in-time optimization feature, it is not possible to -save and reload the JIT data, because it is position-dependent. In addition, -the host on which the patterns are reloaded must be running the same version of -PCRE2, with the same code unit width, and must also have the same endianness, -pointer width and PCRE2_SIZE type. For example, patterns compiled on a 32-bit -system using PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor -can they be reloaded using the 8-bit library. +save and reload the JIT data, because it is position-dependent. The host on +which the patterns are reloaded must be running the same version of PCRE2, with +the same code unit width, and must also have the same endianness, pointer width +and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using +PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be +reloaded using the 8-bit library.

    -
    SAVING COMPILED PATTERNS
    +
    SECURITY CONCERNS
    +

    +The facility for saving and restoring compiled patterns is intended for use +within individual applications. As such, the data supplied to +pcre2_serialize_decode() is expected to be trusted data, not data from +arbitrary external sources. There is only some simple consistency checking, not +complete validation of what is being re-loaded. +

    +
    SAVING COMPILED PATTERNS

    Before compiled patterns can be saved they must be serialized, that is, converted to a stream of bytes. A single byte stream may contain any number of @@ -110,7 +119,7 @@ still be used for matching. Their memory must eventually be freed in the usual way by calling pcre2_code_free(). When you have finished with the byte stream, it too must be freed by calling pcre2_serialize_free().

    -
    RE-USING PRECOMPILED PATTERNS
    +
    RE-USING PRECOMPILED PATTERNS

    In order to re-use a set of saved patterns you must first make the serialized byte stream available in main memory (for example, by reading from a file). The @@ -142,21 +151,27 @@ is filled with those that fit, and the remainder are ignored. The yield of the function is the number of decoded patterns, or one of the following negative error codes:

    -  PCRE2_ERROR_BADDATA   second argument is zero or less
    -  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in the data
    -  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE2 version
    -  PCRE2_ERROR_MEMORY    memory allocation failed
    -  PCRE2_ERROR_NULL      first or third argument is NULL
    +  PCRE2_ERROR_BADDATA    second argument is zero or less
    +  PCRE2_ERROR_BADMAGIC   mismatch of id bytes in the data
    +  PCRE2_ERROR_BADMODE    mismatch of code unit size or PCRE2 version
    +  PCRE2_ERROR_BADSERIALIZEDDATA  other sanity check failure
    +  PCRE2_ERROR_MEMORY     memory allocation failed
    +  PCRE2_ERROR_NULL       first or third argument is NULL
     
    PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled on a system with different endianness.

    Decoded patterns can be used for matching in the usual way, and must be freed -by calling pcre2_code_free() as normal. A single copy of the character -tables is used by all the decoded patterns. A reference count is used to +by calling pcre2_code_free(). However, be aware that there is a potential +race issue if you are using multiple patterns that were decoded from a single +byte stream in a multithreaded application. A single copy of the character +tables is used by all the decoded patterns and a reference count is used to arrange for its memory to be automatically freed when the last pattern is -freed. +freed, but there is no locking on this reference count. Therefore, if you want +to call pcre2_code_free() for these patterns in different threads, you +must arrange your own locking, and ensure that pcre2_code_free() cannot +be called by two threads at the same time.

    If a pattern was processed by pcre2_jit_compile() before being @@ -164,7 +179,7 @@ serialized, the JIT data is discarded and so is no longer available after a save/restore cycle. You can, however, process a restored pattern with pcre2_jit_compile() if you wish.

    -
    AUTHOR
    +
    AUTHOR

    Philip Hazel
    @@ -173,11 +188,11 @@ University Computing Service Cambridge, England.

    -
    REVISION
    +
    REVISION

    -Last updated: 20 January 2015 +Last updated: 24 May 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2stack.html b/pcre2/doc/html/pcre2stack.html index 2942c7a7a..8b5c783de 100644 --- a/pcre2/doc/html/pcre2stack.html +++ b/pcre2/doc/html/pcre2stack.html @@ -57,12 +57,13 @@ assertion and "once-only" subpatterns, which are handled like subroutine calls. Normally, these are never very deep, and the limit on the complexity of pcre2_dfa_match() is controlled by the amount of workspace it is given. However, it is possible to write patterns with runaway infinite recursions; -such patterns will cause pcre2_dfa_match() to run out of stack. At -present, there is no protection against this. +such patterns will cause pcre2_dfa_match() to run out of stack unless a +limit is applied (see below).

    -The comments that follow do NOT apply to pcre2_dfa_match(); they are -relevant only for pcre2_match() without the JIT optimization. +The comments in the next three sections do not apply to +pcre2_dfa_match(); they are relevant only for pcre2_match() without +the JIT optimization.


    Reducing pcre2_match()'s stack usage @@ -115,7 +116,7 @@ entitled in the pcre2api documentation. Since the block sizes are always the same, it may be possible to -implement customized a memory handler that is more efficient than the standard +implement a customized memory handler that is more efficient than the standard function. The memory blocks obtained for this purpose are retained and re-used if possible while pcre2_match() is running. They are all freed just before it exits. @@ -151,6 +152,15 @@ pattern to match. This is done by calling pcre2_match() repeatedly with different limits.


    +Limiting pcre2_dfa_match()'s stack usage +
    +

    +The recursion limit, as described above for pcre2_match(), also applies +to pcre2_dfa_match(), whose use of recursive function calls for +recursions in the pattern can lead to runaway stack usage. The non-recursive +match limit is not relevant for DFA matching, and is ignored. +

    +
    Changing stack size in Unix-like systems

    @@ -198,9 +208,9 @@ Cambridge, England. REVISION

    -Last updated: 21 November 2014 +Last updated: 23 December 2016
    -Copyright © 1997-2014 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2syntax.html b/pcre2/doc/html/pcre2syntax.html index 28ba02362..4cbbba7b0 100644 --- a/pcre2/doc/html/pcre2syntax.html +++ b/pcre2/doc/html/pcre2syntax.html @@ -111,9 +111,10 @@ it matches a literal "u". \W a "non-word" character \X a Unicode extended grapheme cluster -The application can lock out the use of \C by setting the -PCRE2_NEVER_BACKSLASH_C option. It is dangerous because it may leave the -current matching point in the middle of a UTF-8 or UTF-16 character. +\C is dangerous because it may leave the current matching point in the middle +of a UTF-8 or UTF-16 character. The application can lock out the use of \C by +setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 +with the use of \C permanently disabled.

    By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode @@ -187,6 +188,8 @@ at release 5.18.


    SCRIPT NAMES FOR \p AND \P

    +Ahom, +Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, @@ -227,6 +230,7 @@ Gurmukhi, Han, Hangul, Hanunoo, +Hatran, Hebrew, Hiragana, Imperial_Aramaic, @@ -263,12 +267,14 @@ Miao, Modi, Mongolian, Mro, +Multani, Myanmar, Nabataean, New_Tai_Lue, Nko, Ogham, Ol_Chiki, +Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, @@ -290,6 +296,7 @@ Saurashtra, Sharada, Shavian, Siddham, +SignWriting, Sinhala, Sora_Sompeng, Sundanese, @@ -444,9 +451,10 @@ appear. (*UCP) set PCRE2_UCP (use Unicode properties for \d etc) Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the -limits set by the caller of pcre2_match(), not increase them. The application -can lock out the use of (*UTF) and (*UCP) by setting the PCRE2_NEVER_UTF or -PCRE2_NEVER_UCP options, respectively, at compile time. +limits set by the caller of pcre2_match() or pcre2_dfa_match(), not +increase them. The application can lock out the use of (*UTF) and (*UCP) by +setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at +compile time.


    NEWLINE CONVENTION

    @@ -485,6 +493,9 @@ Each top-level branch of a look behind must be of a fixed length. \n reference by number (can be ambiguous) \gn reference by number \g{n} reference by number + \g+n relative reference by number (PCRE2 extension) + \g-n relative reference by number + \g{+n} relative reference by number (PCRE2 extension) \g{-n} relative reference by number \k<name> reference by name (Perl) \k'name' reference by name (Perl) @@ -523,14 +534,17 @@ Each top-level branch of a look behind must be of a fixed length. (?(-n) relative reference condition (?(<name>) named reference condition (Perl) (?('name') named reference condition (Perl) - (?(name) named reference condition (PCRE2) + (?(name) named reference condition (PCRE2, deprecated) (?(R) overall recursion condition - (?(Rn) specific group recursion condition - (?(R&name) specific recursion condition + (?(Rn) specific numbered group recursion condition + (?(R&name) specific named group recursion condition (?(DEFINE) define subpattern for reference (?(VERSION[>]=n.m) test PCRE2 version (?(assert) assertion condition - + +Note the ambiguity of (?(R) and (?(Rn) which might be named reference +conditions or recursion tests. Such a condition is interpreted as a reference +condition if the relevant named group exists.


    BACKTRACKING CONTROL

    @@ -582,9 +596,9 @@ Cambridge, England.


    REVISION

    -Last updated: 13 June 2015 +Last updated: 23 December 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2test.html b/pcre2/doc/html/pcre2test.html index 5165c1e54..ee41e4303 100644 --- a/pcre2/doc/html/pcre2test.html +++ b/pcre2/doc/html/pcre2test.html @@ -61,7 +61,7 @@ subject is processed, and what output is produced.

    As the original fairly simple PCRE library evolved, it acquired many different features, and as a result, the original pcretest program ended up with a -lot of options in a messy, arcane syntax, for testing all the features. The +lot of options in a messy, arcane syntax for testing all the features. The move to the new PCRE2 API provided an opportunity to re-implement the test program as pcre2test, with a cleaner modifier syntax. Nevertheless, there are still many obscure modifiers, some of which are specifically designed for @@ -77,31 +77,61 @@ strings that are encoded in 8-bit, 16-bit, or 32-bit code units. One, two, or all three of these libraries may be simultaneously installed. The pcre2test program can be used to test all the libraries. However, its own input and output are always in 8-bit format. When testing the 16-bit or 32-bit -libraries, patterns and subject strings are converted to 16- or 32-bit format -before being passed to the library functions. Results are converted back to -8-bit code units for output. +libraries, patterns and subject strings are converted to 16-bit or 32-bit +format before being passed to the library functions. Results are converted back +to 8-bit code units for output.

    In the rest of this document, the names of library functions and structures are given in generic form, for example, pcre_compile(). The actual names used in the libraries have a suffix _8, _16, or _32, as appropriate. -

    +


    INPUT ENCODING

    Input to pcre2test is processed line by line, either by calling the C -library's fgets() function, or via the libreadline library (see -below). The input is processed using using C's string functions, so must not -contain binary zeroes, even though in Unix-like environments, fgets() -treats any bytes other than newline as data characters. In some Windows -environments character 26 (hex 1A) causes an immediate end of file, and no -further data is read. +library's fgets() function, or via the libreadline library. In some +Windows environments character 26 (hex 1A) causes an immediate end of file, and +no further data is read, so this character should be avoided unless you really +want that action.

    -For maximum portability, therefore, it is safest to avoid non-printing -characters in pcre2test input files. There is a facility for specifying a -pattern's characters as hexadecimal pairs, thus making it possible to include -binary zeroes in a pattern for testing purposes. Subject lines are processed -for backslash escapes, which makes it possible to include any data value. +The input is processed using using C's string functions, so must not +contain binary zeroes, even though in Unix-like environments, fgets() +treats any bytes other than newline as data characters. An error is generated +if a binary zero is encountered. Subject lines are processed for backslash +escapes, which makes it possible to include any data value in strings that are +passed to the library for matching. For patterns, there is a facility for +specifying some or all of the 8-bit input characters as hexadecimal pairs, +which makes it possible to include binary zeros. +

    +
    +Input for the 16-bit and 32-bit libraries +
    +

    +When testing the 16-bit or 32-bit libraries, there is a need to be able to +generate character code points greater than 255 in the strings that are passed +to the library. For subject lines, backslash escapes can be used. In addition, +when the utf modifier (see +"Setting compilation options" +below) is set, the pattern and any following subject lines are interpreted as +UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate. +

    +

    +For non-UTF testing of wide characters, the utf8_input modifier can be +used. This is mutually exclusive with utf, and is allowed only in 16-bit +or 32-bit mode. It causes the pattern and following subject lines to be treated +as UTF-8 according to the original definition (RFC 2279), which allows for +character values up to 0x7fffffff. Each character is placed in one 16-bit or +32-bit code unit (in the 16-bit case, values greater than 0xffff cause an error +to occur). +

    +

    +UTF-8 is not capable of encoding values greater than 0x7fffffff, but such +values can be handled by the 32-bit library. When testing this library in +non-UTF mode with utf8_input set, if any character is preceded by the +byte 0xff (which is an illegal byte in UTF-8) 0x80000000 is added to the +character's value. This is the only way of passing such code points in a +pattern string. For subject strings, using an escape sequence is preferable.


    COMMAND LINE OPTIONS

    @@ -123,8 +153,13 @@ the 32-bit library has been built, this is the default. If the 32-bit library has not been built, this option causes an error.

    +-ac +Behave as if each pattern has the auto_callout modifier, that is, insert +automatic callouts into every pattern that is compiled. +

    +

    -b -Behave as if each pattern has the /fullbincode modifier; the full +Behave as if each pattern has the fullbincode modifier; the full internal binary form of the pattern is output after compilation.

    @@ -155,12 +190,13 @@ following options output the value and set the exit code as indicated: The following options output 1 for true or 0 for false, and set the exit code to the same value:

    -  ebcdic     compiled for an EBCDIC environment
    -  jit        just-in-time support is available
    -  pcre2-16   the 16-bit library was built
    -  pcre2-32   the 32-bit library was built
    -  pcre2-8    the 8-bit library was built
    -  unicode    Unicode support is available
    +  backslash-C  \C is supported (not locked out)
    +  ebcdic       compiled for an EBCDIC environment
    +  jit          just-in-time support is available
    +  pcre2-16     the 16-bit library was built
    +  pcre2-32     the 32-bit library was built
    +  pcre2-8      the 8-bit library was built
    +  unicode      Unicode support is available
     
    If an unknown option is given, an error message is output; the exit code is 0.

    @@ -177,12 +213,19 @@ using the pcre2_dfa_match() function instead of the default pcre2_match().

    +-error number[,number,...] +Call pcre2_get_error_message() for each of the error numbers in the +comma-separated list, display the resulting messages on the standard output, +then exit with zero exit code. The numbers may be positive or negative. This is +a convenience facility for PCRE2 maintainers. +

    +

    -help Output a brief summary these options and then exit.

    -i -Behave as if each pattern has the /info modifier; information about the +Behave as if each pattern has the info modifier; information about the compiled pattern is given after compilation.

    @@ -265,9 +308,9 @@ Each subject line is matched separately and independently. If you want to do multi-line matches, you have to use the \n escape sequence (or \r or \r\n, etc., depending on the newline setting) in a single line of input to encode the newline sequences. There is no limit on the length of subject lines; the input -buffer is automatically extended if it is too small. There is a replication -feature that makes it possible to generate long subject lines without having to -supply them explicitly. +buffer is automatically extended if it is too small. There are replication +features that makes it possible to generate long repetitive pattern or subject +lines without having to supply them explicitly.

    An empty line or the end of the file signals the end of the subject lines for a @@ -304,6 +347,36 @@ output. This command is used to load a set of precompiled patterns from a file, as described in the section entitled "Saving and restoring compiled patterns" below. +

    +  #newline_default [<newline-list>]
    +
    +When PCRE2 is built, a default newline convention can be specified. This +determines which characters and/or character pairs are recognized as indicating +a newline in a pattern or subject string. The default can be overridden when a +pattern is compiled. The standard test files contain tests of various newline +conventions, but the majority of the tests expect a single linefeed to be +recognized as a newline by default. Without special action the tests would fail +when PCRE2 is compiled with either CR or CRLF as the default newline. +

    +

    +The #newline_default command specifies a list of newline types that are +acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, or +ANY (in upper or lower case), for example: +

    +  #newline_default LF Any anyCRLF
    +
    +If the default newline is in the list, this command has no effect. Otherwise, +except when testing the POSIX API, a newline modifier that specifies the +first newline convention in the list (LF in the above example) is added to any +pattern that does not already have a newline modifier. If the newline +list is empty, the feature is turned off. This command is present in a number +of the standard test input files. +

    +

    +When the POSIX API is being tested there is no way to override the default +newline convention, though it is possible to set the newline convention from +within the pattern. A warning is given if the posix modifier is used when +#newline_default would set a default for the non-POSIX API.

       #pattern <modifier-list>
     
    @@ -321,9 +394,10 @@ test files that are also processed by perltest.sh. The #perltest command helps detect tests that are accidentally put in the wrong file.
       #pop [<modifiers>]
    +  #popcopy [<modifiers>]
     
    -This command is used to manipulate the stack of compiled patterns, as described -in the section entitled "Saving and restoring compiled patterns" +These commands are used to manipulate the stack of compiled patterns, as +described in the section entitled "Saving and restoring compiled patterns" below.
       #save <filename>
    @@ -340,12 +414,13 @@ subject lines. Modifiers on a subject line can change these settings.
     
    MODIFIER SYNTAX

    Modifier lists are used with both pattern and subject lines. Items in a list -are separated by commas and optional white space. Some modifiers may be given -for both patterns and subject lines, whereas others are valid for one or the -other only. Each modifier has a long name, for example "anchored", and some of -them must be followed by an equals sign and a value, for example, "offset=12". -Modifiers that do not take values may be preceded by a minus sign to turn off a -previous setting. +are separated by commas followed by optional white space. Trailing whitespace +in a modifier list is ignored. Some modifiers may be given for both patterns +and subject lines, whereas others are valid only for one or the other. Each +modifier has a long name, for example "anchored", and some of them must be +followed by an equals sign and a value, for example, "offset=12". Values cannot +contain comma characters, but may contain spaces. Modifiers that do not take +values may be preceded by a minus sign to turn off a previous setting.

    A few of the more common modifiers can also be specified as single letters, for @@ -454,6 +529,12 @@ the start of a modifier list. For example:

       abc\=notbol,notempty
     
    +If the subject string is empty and \= is followed by whitespace, the line is +treated as a comment line, and is not used for matching. For example: +
    +  \= This is a comment.
    +  abc\= This is an invalid modifier list.
    +
    A backslash followed by any other non-alphanumeric character just escapes that character. A backslash followed by anything else causes an error. However, if the very last character in the line is a backslash (and there is no modifier @@ -462,10 +543,10 @@ a real empty line terminates the data input.


    PATTERN MODIFIERS

    -There are three types of modifier that can appear in pattern lines, two of -which may also be used in a #pattern command. A pattern's modifier list -can add to or override default modifiers that were set by a previous -#pattern command. +There are several types of modifier that can appear in pattern lines. Except +where noted below, they may also be used in #pattern commands. A +pattern's modifier list can add to or override default modifiers that were set +by a previous #pattern command.


    Setting compilation options @@ -473,12 +554,13 @@ Setting compilation options

    The following modifiers set options for pcre2_compile(). The most common ones have single-letter abbreviations. See -pcreapi +pcre2api for a description of their effects.

           allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
           alt_bsux                  set PCRE2_ALT_BSUX
           alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
    +      alt_verbnames             set PCRE2_ALT_VERBNAMES
           anchored                  set PCRE2_ANCHORED
           auto_callout              set PCRE2_AUTO_CALLOUT
       /i  caseless                  set PCRE2_CASELESS
    @@ -499,12 +581,15 @@ for a description of their effects.
           no_utf_check              set PCRE2_NO_UTF_CHECK
           ucp                       set PCRE2_UCP
           ungreedy                  set PCRE2_UNGREEDY
    +      use_offset_limit          set PCRE2_USE_OFFSET_LIMIT
           utf                       set PCRE2_UTF
     
    As well as turning on the PCRE2_UTF option, the utf modifier causes all non-printing characters in output strings to be printed using the \x{hh...} notation. Otherwise, those less than 0x100 are output in hex without the curly -brackets. +brackets. Setting utf in 16-bit or 32-bit mode also causes pattern and +subject strings to be translated to UTF-16 or UTF-32, respectively, before +being passed to library functions.


    Setting compilation controls @@ -519,18 +604,24 @@ about the pattern: debug same as info,fullbincode fullbincode show binary code with lengths /I info show info about compiled pattern - hex pattern is coded in hexadecimal + hex unquoted characters are hexadecimal jit[=<number>] use JIT jitfast use JIT fast path jitverify verify JIT use locale=<name> use this locale + max_pattern_length=<n> set the maximum pattern length memory show memory used newline=<type> set newline type + null_context compile with a NULL context parens_nest_limit=<n> set maximum parentheses depth posix use the POSIX API + posix_nosub use the POSIX API with REG_NOSUB push push compiled pattern onto the stack + pushcopy push a copy onto the stack stackguard=<number> test the stackguard feature tables=[0|1|2] select internal tables + use_length do not zero-terminate the pattern + utf8_input treat input as UTF-8
    The effects of these modifiers are described in the following sections.

    @@ -604,40 +695,145 @@ is requested. For each callout, either its number or string is given, followed by the item that follows it in the pattern.


    -Specifying a pattern in hex +Passing a NULL context

    -The hex modifier specifies that the characters of the pattern are to be -interpreted as pairs of hexadecimal digits. White space is permitted between -pairs. For example: +Normally, pcre2test passes a context block to pcre2_compile(). If +the null_context modifier is set, however, NULL is passed. This is for +testing that pcre2_compile() behaves correctly in this case (it uses +default values). +

    +
    +Specifying the pattern's length +
    +

    +By default, patterns are passed to the compiling functions as zero-terminated +strings. When using the POSIX wrapper API, there is no other option. However, +when using PCRE2's native API, patterns can be passed by length instead of +being zero-terminated. The use_length modifier causes this to happen. +Using a length happens automatically (whether or not use_length is set) +when hex is set, because patterns specified in hexadecimal may contain +binary zeros. +

    +
    +Specifying pattern characters in hexadecimal +
    +

    +The hex modifier specifies that the characters of the pattern, except for +substrings enclosed in single or double quotes, are to be interpreted as pairs +of hexadecimal digits. This feature is provided as a way of creating patterns +that contain binary zeros and other non-printing characters. White space is +permitted between pairs of digits. For example, this pattern contains three +characters:

       /ab 32 59/hex
     
    -This feature is provided as a way of creating patterns that contain binary zero -and other non-printing characters. By default, pcre2test passes patterns -as zero-terminated strings to pcre2_compile(), giving the length as -PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal, the -actual length of the pattern is passed. +Parts of such a pattern are taken literally if quoted. This pattern contains +nine characters, only two of which are specified in hexadecimal: +
    +  /ab "literal" 32/hex
    +
    +Either single or double quotes may be used. There is no way of including +the delimiter within a substring. The hex and expand modifiers are +mutually exclusive. +

    +

    +The POSIX API cannot be used with patterns specified in hexadecimal because +they may contain binary zeros, which conflicts with regcomp()'s +requirement for a zero-terminated string. Such patterns are always passed to +pcre2_compile() as a string with a length, not as zero-terminated. +

    +
    +Specifying wide characters in 16-bit and 32-bit modes +
    +

    +In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and +translated to UTF-16 or UTF-32 when the utf modifier is set. For testing +the 16-bit and 32-bit libraries in non-UTF mode, the utf8_input modifier +can be used. It is mutually exclusive with utf. Input lines are +interpreted as UTF-8 as a means of specifying wide characters. More details are +given in +"Input encoding" +above. +

    +
    +Generating long repetitive patterns +
    +

    +Some tests use long patterns that are very repetitive. Instead of creating a +very long input line for such a pattern, you can use a special repetition +feature, similar to the one described for subject lines above. If the +expand modifier is present on a pattern, parts of the pattern that have +the form +

    +  \[<characters>]{<count>}
    +
    +are expanded before the pattern is passed to pcre2_compile(). For +example, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction +cannot be nested. An initial "\[" sequence is recognized only if "]{" followed +by decimal digits and "}" is found later in the pattern. If not, the characters +remain in the pattern unaltered. The expand and hex modifiers are +mutually exclusive. +

    +

    +If part of an expanded pattern looks like an expansion, but is really part of +the actual pattern, unwanted expansion can be avoided by giving two values in +the quantifier. For example, \[AB]{6000,6000} is not recognized as an +expansion item. +

    +

    +If the info modifier is set on an expanded pattern, the result of the +expansion is included in the information that is output.


    JIT compilation

    -The /jit modifier may optionally be followed by an equals sign and a -number in the range 0 to 7: +Just-in-time (JIT) compiling is a heavyweight optimization that can greatly +speed up pattern matching. See the +pcre2jit +documentation for details. JIT compiling happens, optionally, after a pattern +has been successfully compiled into an internal form. The JIT compiler converts +this to optimized machine code. It needs to know whether the match-time options +PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, because +different code is generated for the different cases. See the partial +modifier in "Subject Modifiers" +below +for details of how these options are specified for each match attempt. +

    +

    +JIT compilation is requested by the /jit pattern modifier, which may +optionally be followed by an equals sign and a number in the range 0 to 7. +The three bits that make up the number specify which of the three JIT operating +modes are to be compiled: +

    +  1  compile JIT code for non-partial matching
    +  2  compile JIT code for soft partial matching
    +  4  compile JIT code for hard partial matching
    +
    +The possible values for the jit modifier are therefore:
       0  disable JIT
    -  1  use JIT for normal match only
    -  2  use JIT for soft partial match only
    -  3  use JIT for normal match and soft partial match
    -  4  use JIT for hard partial match only
    -  6  use JIT for soft and hard partial match
    +  1  normal matching only
    +  2  soft partial matching only
    +  3  normal and soft partial matching
    +  4  hard partial matching only
    +  6  soft and hard partial matching only
       7  all three modes
     
    -If no number is given, 7 is assumed. If JIT compilation is successful, the -compiled JIT code will automatically be used when pcre2_match() is run -for the appropriate type of match, except when incompatible run-time options -are specified. For more details, see the +If no number is given, 7 is assumed. The phrase "partial matching" means a call +to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the +PCRE2_PARTIAL_HARD option set. Note that such a call may return a complete +match; the options enable the possibility of a partial match, but do not +require it. Note also that if you request JIT compilation only for partial +matching (for example, /jit=2) but do not set the partial modifier on a +subject line, that match will not use JIT code because none was compiled for +non-partial matching. +

    +

    +If JIT compilation is successful, the compiled JIT code will automatically be +used when an appropriate type of match is run, except when incompatible +run-time options are specified. For more details, see the pcre2jit documentation. See also the jitstack modifier below for a way of setting the size of the JIT stack. @@ -661,14 +857,14 @@ code was actually used in the match. Setting a locale

    -The /locale modifier must specify the name of a locale, for example: +The locale modifier must specify the name of a locale, for example:

       /pattern/locale=fr_FR
     
    The given locale is set, pcre2_maketables() is called to build a set of character tables for the locale, and this is then passed to pcre2_compile() when compiling the regular expression. The same tables -are used when matching the following subject lines. The /locale modifier +are used when matching the following subject lines. The locale modifier applies only to the pattern on which it appears, but can be given in a #pattern command if a default is needed. Setting a locale and alternate character tables are mutually exclusive. @@ -677,7 +873,7 @@ character tables are mutually exclusive. Showing pattern memory

    -The /memory modifier causes the size in bytes of the memory used to hold +The memory modifier causes the size in bytes of the memory used to hold the compiled pattern to be output. This does not include the size of the pcre2_code block; it is just the actual compiled data. If the pattern is subsequently passed to the JIT compiler, the size of the JIT compiled code is @@ -700,30 +896,53 @@ sets its own default of 220, which is required for running the standard test suite.


    +Limiting the pattern length +
    +

    +The max_pattern_length modifier sets a limit, in code units, to the +length of pattern that pcre2_compile() will accept. Breaching the limit +causes a compilation error. The default is the largest number a PCRE2_SIZE +variable can hold (essentially unlimited). +

    +
    Using the POSIX wrapper API

    -The /posix modifier causes pcre2test to call PCRE2 via the POSIX -wrapper API rather than its native API. This supports only the 8-bit library. -When the POSIX API is being used, the following pattern modifiers set options -for the regcomp() function: +The /posix and posix_nosub modifiers cause pcre2test to call +PCRE2 via the POSIX wrapper API rather than its native API. When +posix_nosub is used, the POSIX option REG_NOSUB is passed to +regcomp(). The POSIX wrapper supports only the 8-bit library. Note that +it does not imply POSIX matching semantics; for more detail see the +pcre2posix +documentation. The following pattern modifiers set options for the +regcomp() function:

       caseless           REG_ICASE
       multiline          REG_NEWLINE
    -  no_auto_capture    REG_NOSUB
       dotall             REG_DOTALL     )
       ungreedy           REG_UNGREEDY   ) These options are not part of
       ucp                REG_UCP        )   the POSIX standard
       utf                REG_UTF8       )
     
    +The regerror_buffsize modifier specifies a size for the error buffer that +is passed to regerror() in the event of a compilation error. For example: +
    +  /abc/posix,regerror_buffsize=20
    +
    +This provides a means of testing the behaviour of regerror() when the +buffer is too small for the error message. If this modifier has not been set, a +large buffer is used. +

    +

    The aftertext and allaftertext subject modifiers work as described -below. All other modifiers cause an error. +below. All other modifiers are either ignored, with a warning message, or cause +an error.


    Testing the stack guard feature

    -The /stackguard modifier is used to test the use of +The stackguard modifier is used to test the use of pcre2_set_compile_recursion_guard(), a function that is provided to enable stack availability to be checked during compilation (see the pcre2api @@ -738,7 +957,7 @@ be aborted. Using alternative character tables

    -The value specified for the /tables modifier must be one of the digits 0, +The value specified for the tables modifier must be one of the digits 0, 1, or 2. It causes a specific set of built-in character tables to be passed to pcre2_compile(). This is used in the PCRE2 tests to check behaviour with different character tables. The digit specifies the tables as follows: @@ -758,17 +977,22 @@ Setting certain match controls

    The following modifiers are really subject modifiers, and are described below. However, they may be included in a pattern's modifier list, in which case they -are applied to every subject line that is processed with that pattern. They do -not affect the compilation process. +are applied to every subject line that is processed with that pattern. They may +not appear in #pattern commands. These modifiers do not affect the +compilation process.

    -      aftertext           show text after match
    -      allaftertext        show text after captures
    -      allcaptures         show all captures
    -      allusedtext         show all consulted text
    -  /g  global              global matching
    -      mark                show mark values
    -      replace=<string>    specify a replacement string
    -      startchar           show starting character when relevant
    +      aftertext                  show text after match
    +      allaftertext               show text after captures
    +      allcaptures                show all captures
    +      allusedtext                show all consulted text
    +  /g  global                     global matching
    +      mark                       show mark values
    +      replace=<string>           specify a replacement string
    +      startchar                  show starting character when relevant
    +      substitute_extended        use PCRE2_SUBSTITUTE_EXTENDED
    +      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
    +      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
    +      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
     
    These modifiers may not appear in a #pattern command. If you want them as defaults, set them in a #subject command. @@ -782,13 +1006,17 @@ pushed onto a stack of compiled patterns, and pcre2test expects the next line to contain a new pattern (or a command) instead of a subject line. This facility is used when saving compiled patterns to a file, as described in the section entitled "Saving and restoring compiled patterns" -below. -The push modifier is incompatible with compilation modifiers such as -global that act at match time. Any that are specified are ignored, with a -warning message, except for replace, which causes an error. Note that, -jitverify, which is allowed, does not carry through to any subsequent -matching that uses this pattern. -

    +below. If pushcopy is used instead of push, a copy of the compiled +pattern is stacked, leaving the original as current, ready to match the +following input lines. This provides a way of testing the +pcre2_code_copy() function. +The push and pushcopy modifiers are incompatible with compilation +modifiers such as global that act at match time. Any that are specified +are ignored (for the stacked copy), with a warning message, except for +replace, which causes an error. Note that jitverify, which is +allowed, does not carry through to any subsequent matching that uses a stacked +pattern. +


    SUBJECT MODIFIERS

    The modifiers that can appear in subject lines and the #subject @@ -806,6 +1034,7 @@ for a description of their effects. anchored set PCRE2_ANCHORED dfa_restart set PCRE2_DFA_RESTART dfa_shortest set PCRE2_DFA_SHORTEST + no_jit set PCRE2_NO_JIT no_utf_check set PCRE2_NO_UTF_CHECK notbol set PCRE2_NOTBOL notempty set PCRE2_NOTEMPTY @@ -818,11 +1047,11 @@ The partial matching modifiers are provided with abbreviations because they appear frequently in tests.

    -If the /posix modifier was present on the pattern, causing the POSIX +If the posix modifier was present on the pattern, causing the POSIX wrapper API to be used, the only option-setting modifiers that have any effect are notbol, notempty, and noteol, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec(). -Any other modifiers cause an error. +The other modifiers are ignored, with a warning message.


    Setting match controls @@ -833,33 +1062,44 @@ information. Some of them may also be specified on a pattern line (see above), in which case they apply to every subject line that is matched against that pattern.
    -      aftertext                 show text after match
    -      allaftertext              show text after captures
    -      allcaptures               show all captures
    -      allusedtext               show all consulted text (non-JIT only)
    -      altglobal                 alternative global matching
    -      callout_capture           show captures at callout time
    -      callout_data=<n>          set a value to pass via callouts
    -      callout_fail=<n>[:<m>]    control callout failure
    -      callout_none              do not supply a callout function
    -      copy=<number or name>     copy captured substring
    -      dfa                       use pcre2_dfa_match()
    -      find_limits               find match and recursion limits
    -      get=<number or name>      extract captured substring
    -      getall                    extract all captured substrings
    -  /g  global                    global matching
    -      jitstack=<n>              set size of JIT stack
    -      mark                      show mark values
    -      match_limit=>n>           set a match limit
    -      memory                    show memory usage
    -      offset=<n>                set starting offset
    -      ovector=<n>               set size of output vector
    -      recursion_limit=<n>       set a recursion limit
    -      replace=<string>          specify a replacement string
    -      startchar                 show startchar when relevant
    -      zero_terminate            pass the subject as zero-terminated
    +      aftertext                  show text after match
    +      allaftertext               show text after captures
    +      allcaptures                show all captures
    +      allusedtext                show all consulted text (non-JIT only)
    +      altglobal                  alternative global matching
    +      callout_capture            show captures at callout time
    +      callout_data=<n>           set a value to pass via callouts
    +      callout_error=<n>[:<m>]    control callout error
    +      callout_fail=<n>[:<m>]     control callout failure
    +      callout_none               do not supply a callout function
    +      copy=<number or name>      copy captured substring
    +      dfa                        use pcre2_dfa_match()
    +      find_limits                find match and recursion limits
    +      get=<number or name>       extract captured substring
    +      getall                     extract all captured substrings
    +  /g  global                     global matching
    +      jitstack=<n>               set size of JIT stack
    +      mark                       show mark values
    +      match_limit=<n>            set a match limit
    +      memory                     show memory usage
    +      null_context               match with a NULL context
    +      offset=<n>                 set starting offset
    +      offset_limit=<n>           set offset limit
    +      ovector=<n>                set size of output vector
    +      recursion_limit=<n>        set a recursion limit
    +      replace=<string>           specify a replacement string
    +      startchar                  show startchar when relevant
    +      startoffset=<n>            same as offset=<n>
    +      substitute_extedded        use PCRE2_SUBSTITUTE_EXTENDED
    +      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
    +      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
    +      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
    +      zero_terminate             pass the subject as zero-terminated
     
    -The effects of these modifiers are described in the following sections. +The effects of these modifiers are described in the following sections. When +matching via the POSIX wrapper API, the aftertext, allaftertext, +and ovector subject modifiers work as described below. All other +modifiers are either ignored, with a warning message, or cause an error.


    Showing more text @@ -916,7 +1156,8 @@ The allcaptures modifier requests that the values of all potential captured parentheses be output after a match. By default, only those up to the highest one actually used in the match are output (corresponding to the return code from pcre2_match()). Groups that did not take part in the match -are output as "<unset>". +are output as "<unset>". This modifier is not relevant for DFA matching (which +does no capturing); it is ignored, with a warning message, if present.


    Testing callouts @@ -924,15 +1165,22 @@ Testing callouts

    A callout function is supplied when pcre2test calls the library matching functions, unless callout_none is specified. If callout_capture is -set, the current captured groups are output when a callout occurs. +set, the current captured groups are output when a callout occurs. The default +return from the callout function is zero, which allows matching to continue.

    The callout_fail modifier can be given one or two numbers. If there is -only one number, 1 is returned instead of 0 when a callout of that number is -reached. If two numbers are given, 1 is returned when callout <n> is reached -for the <m>th time. Note that callouts with string arguments are always given -the number zero. See "Callouts" below for a description of the output when a -callout it taken. +only one number, 1 is returned instead of 0 (causing matching to backtrack) +when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1 +is returned when callout <n> is reached and there have been at least <m> +callouts. The callout_error modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +callout_error takes precedence. +

    +

    +Note that callouts with string arguments are always given the number zero. See +"Callouts" below for a description of the output when a callout it taken.

    The callout_data modifier can be given an unsigned or a negative number. @@ -945,7 +1193,7 @@ Finding all matches in a string

    Searching for all possible matches within a subject can be requested by the -global or /altglobal modifier. After finding a match, the matching +global or altglobal modifier. After finding a match, the matching function is called again to search the remainder of the subject. The difference between global and altglobal is that the former uses the start_offset argument to pcre2_match() or pcre2_dfa_match() @@ -996,19 +1244,34 @@ Testing the substitution function

    If the replace modifier is set, the pcre2_substitute() function is -called instead of one of the matching functions. Unlike subject strings, -pcre2test does not process replacement strings for escape sequences. In -UTF mode, a replacement string is checked to see if it is a valid UTF-8 string. -If so, it is correctly converted to a UTF string of the appropriate code unit -width. If it is not a valid UTF-8 string, the individual code units are copied -directly. This provides a means of passing an invalid UTF-8 string for testing -purposes. +called instead of one of the matching functions. Note that replacement strings +cannot contain commas, because a comma signifies the end of a modifier. This is +not thought to be an issue in a test program.

    -If the global modifier is set, PCRE2_SUBSTITUTE_GLOBAL is passed to -pcre2_substitute(). After a successful substitution, the modified string -is output, preceded by the number of replacements. This may be zero if there -were no matches. Here is a simple example of a substitution test: +Unlike subject strings, pcre2test does not process replacement strings +for escape sequences. In UTF mode, a replacement string is checked to see if it +is a valid UTF-8 string. If so, it is correctly converted to a UTF string of +the appropriate code unit width. If it is not a valid UTF-8 string, the +individual code units are copied directly. This provides a means of passing an +invalid UTF-8 string for testing purposes. +

    +

    +The following modifiers set options (in additional to the normal match options) +for pcre2_substitute(): +

    +  global                      PCRE2_SUBSTITUTE_GLOBAL
    +  substitute_extended         PCRE2_SUBSTITUTE_EXTENDED
    +  substitute_overflow_length  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
    +  substitute_unknown_unset    PCRE2_SUBSTITUTE_UNKNOWN_UNSET
    +  substitute_unset_empty      PCRE2_SUBSTITUTE_UNSET_EMPTY
    +
    +
    +

    +

    +After a successful substitution, the modified string is output, preceded by the +number of replacements. This may be zero if there were no matches. Here is a +simple example of a substitution test:

       /abc/replace=xxx
           =abc=abc=
    @@ -1016,12 +1279,12 @@ were no matches. Here is a simple example of a substitution test:
           =abc=abc=\=global
        2: =xxx=xxx=
     
    -Subject and replacement strings should be kept relatively short for -substitution tests, as fixed-size buffers are used. To make it easy to test for -buffer overflow, if the replacement string starts with a number in square -brackets, that number is passed to pcre2_substitute() as the size of the -output buffer, with the replacement string starting at the next character. Here -is an example that tests the edge case: +Subject and replacement strings should be kept relatively short (fewer than 256 +characters) for substitution tests, as fixed-size buffers are used. To make it +easy to test for buffer overflow, if the replacement string starts with a +number in square brackets, that number is passed to pcre2_substitute() as +the size of the output buffer, with the replacement string starting at the next +character. Here is an example that tests the edge case:
       /abc/
           123abc123\=replace=[10]XYZ
    @@ -1029,6 +1292,19 @@ is an example that tests the edge case:
           123abc123\=replace=[9]XYZ
       Failed: error -47: no more memory
     
    +The default action of pcre2_substitute() is to return +PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the +substitute_overflow_length modifier), pcre2_substitute() continues +to go through the motions of matching and substituting, in order to compute the +size of buffer that is required. When this happens, pcre2test shows the +required buffer length (which includes space for the trailing zero) as part of +the error message. For example: +
    +  /abc/substitute_overflow_length
    +      123abc123\=replace=[9]XYZ
    +  Failed: error -47: no more memory: 10 code units are needed
    +
    A replacement string is ignored with POSIX and DFA matching. Specifying partial matching provokes an error return ("bad option value") from pcre2_substitute(). @@ -1100,6 +1376,16 @@ The offset modifier sets an offset in the subject string at which matching starts. Its value is a number of code units, not characters.


    +Setting an offset limit +
    +

    +The offset_limit modifier sets a limit for unanchored matches. If a match +cannot be found starting at or before this offset in the subject, a "no match" +return is given. The data value is a number of code units, not characters. When +this modifier is used, the use_offset_limit modifier must have been set +for the pattern; if not, an error is generated. +

    +
    Setting the size of the output vector

    @@ -1131,6 +1417,17 @@ this modifier has no effect, as there is no facility for passing a length.) When testing pcre2_substitute(), this modifier also has the effect of passing the replacement string as zero-terminated.

    +
    +Passing a NULL context +
    +

    +Normally, pcre2test passes a context block to pcre2_match(), +pcre2_dfa_match() or pcre2_jit_match(). If the null_context +modifier is set, however, NULL is passed. This is for testing that the matching +functions behave correctly in this case (they use default values). This +modifier cannot be used with the find_limits modifier or when testing the +substitution function. +


    THE ALTERNATIVE MATCHING FUNCTION

    By default, pcre2test uses the standard PCRE2 matching function, @@ -1196,7 +1493,7 @@ unset substring is shown as "<unset>", as for the second data line. If the strings contain any non-printing characters, they are output as \xhh escapes if the value is less than 256 and UTF mode is not set. Otherwise they are output as \x{hh...} escapes. See below for the definition of non-printing -characters. If the /aftertext modifier is set, the output for substring +characters. If the aftertext modifier is set, the output for substring 0 is followed by the the rest of the subject string, identified by "0+" like this:

    @@ -1321,7 +1618,9 @@ item to be tested. For example:
     This output indicates that callout number 0 occurred for a match attempt
     starting at the fourth character of the subject string, when the pointer was at
     the seventh character, and when the next pattern item was \d. Just
    -one circumflex is output if the start and current positions are the same.
    +one circumflex is output if the start and current positions are the same, or if
    +the current position precedes the start position, which can happen if the
    +callout is in a lookbehind assertion.
     

    Callouts numbered 255 are assumed to be automatic callouts, inserted as a @@ -1387,7 +1686,7 @@ therefore shown as hex escapes.

    When pcre2test is outputting text that is a matched part of a subject string, it behaves in the same way, unless a different locale has been set for -the pattern (using the /locale modifier). In this case, the +the pattern (using the locale modifier). In this case, the isprint() function is used to distinguish printing and non-printing characters.

    @@ -1413,11 +1712,16 @@ can be used to test these functions.

    When a pattern with push modifier is successfully compiled, it is pushed onto a stack of compiled patterns, and pcre2test expects the next line to -contain a new pattern (or command) instead of a subject line. By this means, a -number of patterns can be compiled and retained. The push modifier is -incompatible with posix, and control modifiers that act at match time are -ignored (with a message). The jitverify modifier applies only at compile -time. The command +contain a new pattern (or command) instead of a subject line. By contrast, +the pushcopy modifier causes a copy of the compiled pattern to be +stacked, leaving the original available for immediate matching. By using +push and/or pushcopy, a number of patterns can be compiled and +retained. These modifiers are incompatible with posix, and control +modifiers that act at match time are ignored (with a message) for the stacked +patterns. The jitverify modifier applies only at compile time. +

    +

    +The command

       #save <filename>
     
    @@ -1434,7 +1738,8 @@ usual by an empty line or end of file. This command may be followed by a modifier list containing only control modifiers that act after a pattern has been compiled. In particular, hex, -posix, and push are not allowed, nor are any +posix, posix_nosub, push, and pushcopy are not allowed, +nor are any option-setting modifiers. The JIT modifiers are, however permitted. Here is an example that saves and reloads two patterns. @@ -1452,6 +1757,11 @@ reloads two patterns. If jitverify is used with #pop, it does not automatically imply jit, which is different behaviour from when it is used on a pattern.

    +

    +The #popcopy command is analagous to the pushcopy modifier in that it +makes current a copy of the topmost stack pattern, leaving the original still +on the stack. +


    SEE ALSO

    pcre2(3), pcre2api(3), pcre2callout(3), @@ -1469,9 +1779,9 @@ Cambridge, England.


    REVISION

    -Last updated: 20 May 2015 +Last updated: 28 December 2016
    -Copyright © 1997-2015 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2unicode.html b/pcre2/doc/html/pcre2unicode.html index 22c1792d9..6ca367f4e 100644 --- a/pcre2/doc/html/pcre2unicode.html +++ b/pcre2/doc/html/pcre2unicode.html @@ -67,15 +67,20 @@ In UTF modes, the dot metacharacter matches one UTF character instead of a single code unit.

    -The escape sequence \C can be used to match a single code unit, in a UTF mode, +The escape sequence \C can be used to match a single code unit in a UTF mode, but its use can lead to some strange effects because it breaks up multi-unit characters (see the description of \C in the pcre2pattern -documentation). The use of \C is not supported in the alternative matching -function pcre2_dfa_match(), nor is it supported in UTF mode by the JIT -optimization. If JIT optimization is requested for a UTF pattern that contains -\C, it will not succeed, and so the matching will be carried out by the normal -interpretive function. +documentation). +

    +

    +The use of \C is not supported by the alternative matching function +pcre2_dfa_match() when in UTF-8 or UTF-16 mode, that is, when a character +may consist of more than one code unit. The use of \C in these modes provokes +a match-time error. Also, the JIT optimization does not support \C in these +modes. If JIT optimization is requested for a UTF-8 or UTF-16 pattern that +contains \C, it will not succeed, and so when pcre2_match() is called, +the matching will be carried out by the normal interpretive function.

    The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test @@ -126,11 +131,22 @@ as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting strings to be in host byte order.

    -The entire string is checked before any other processing takes place. In -addition to checking the format of the string, there is a check to ensure that -all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area. -The so-called "non-character" code points are not excluded because Unicode -corrigendum #9 makes it clear that they should not be. +A UTF string is checked before any other processing takes place. In the case of +pcre2_match() and pcre2_dfa_match() calls with a non-zero starting +offset, the check is applied only to that part of the subject that could be +inspected during matching, and there is a check that the starting offset points +to the first code unit of a character or to the end of the subject. If there +are no lookbehind assertions in the pattern, the check starts at the starting +offset. Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \b and \B are +one-character lookbehinds. +

    +

    +In addition to checking the format of the string, there is a check to ensure +that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate +area. The so-called "non-character" code points are not excluded because +Unicode corrigendum #9 makes it clear that they should not be.

    Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, @@ -232,9 +248,9 @@ Errors in UTF-16 strings

    The following negative error codes are given for invalid UTF-16 strings:

    -  PCRE_UTF16_ERR1  Missing low surrogate at end of string
    -  PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
    -  PCRE_UTF16_ERR3  Isolated low surrogate
    +  PCRE2_ERROR_UTF16_ERR1  Missing low surrogate at end of string
    +  PCRE2_ERROR_UTF16_ERR2  Invalid low surrogate follows high surrogate
    +  PCRE2_ERROR_UTF16_ERR3  Isolated low surrogate
     
     

    @@ -244,8 +260,8 @@ Errors in UTF-32 strings

    The following negative error codes are given for invalid UTF-32 strings:

    -  PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
    -  PCRE_UTF32_ERR2  Code point is greater than 0x10ffff
    +  PCRE2_ERROR_UTF32_ERR1  Surrogate character (0xd800 to 0xdfff)
    +  PCRE2_ERROR_UTF32_ERR2  Code point is greater than 0x10ffff
     
     

    @@ -264,9 +280,9 @@ Cambridge, England. REVISION

    -Last updated: 23 November 2014 +Last updated: 03 July 2016
    -Copyright © 1997-2014 University of Cambridge. +Copyright © 1997-2016 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/index.html.src b/pcre2/doc/index.html.src index ae938fd3c..eebb80b77 100644 --- a/pcre2/doc/index.html.src +++ b/pcre2/doc/index.html.src @@ -91,6 +91,12 @@ in the library. pcre2_callout_enumerate   Enumerate callouts in a compiled pattern +pcre2_code_copy +   Copy a compiled pattern + +pcre2_code_copy_with_tables +   Copy a compiled pattern and its character tables + pcre2_code_free   Free a compiled pattern @@ -210,9 +216,15 @@ in the library. pcre2_set_match_limit   Set the match limit +pcre2_set_max_pattern_length +   Set the maximum length of pattern + pcre2_set_newline   Set the newline convention +pcre2_set_offset_limit +   Set the offset limit + pcre2_set_parens_nest_limit   Set the parentheses nesting limit diff --git a/pcre2/doc/pcre2.3 b/pcre2/doc/pcre2.3 index e315bbb60..9a84ce31e 100644 --- a/pcre2/doc/pcre2.3 +++ b/pcre2/doc/pcre2.3 @@ -1,4 +1,4 @@ -.TH PCRE2 3 "13 April 2015" "PCRE2 10.20" +.TH PCRE2 3 "16 October 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH INTRODUCTION @@ -118,8 +118,10 @@ running redundant checks. .P The use of the \eC escape sequence in a UTF-8 or UTF-16 pattern can lead to problems, because it may leave the current matching point in the middle of a -multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to -lock out the use of \eC, causing a compile-time error if it is encountered. +multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an +application to lock out the use of \eC, causing a compile-time error if it is +encountered. It is also possible to build PCRE2 with the use of \eC permanently +disabled. .P Another way that performance can be hit is by running a pattern that has a very large search tree against a string that will never match. Nested unlimited @@ -187,6 +189,6 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk. .rs .sp .nf -Last updated: 13 April 2015 +Last updated: 16 October 2015 Copyright (c) 1997-2015 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2.txt b/pcre2/doc/pcre2.txt index 3193d3054..650660957 100644 --- a/pcre2/doc/pcre2.txt +++ b/pcre2/doc/pcre2.txt @@ -104,26 +104,27 @@ SECURITY CONSIDERATIONS The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to problems, because it may leave the current matching point in the middle of a multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C - option can be used to lock out the use of \C, causing a compile-time - error if it is encountered. + option can be used by an application to lock out the use of \C, causing + a compile-time error if it is encountered. It is also possible to build + PCRE2 with the use of \C permanently disabled. - Another way that performance can be hit is by running a pattern that - has a very large search tree against a string that will never match. - Nested unlimited repeats in a pattern are a common example. PCRE2 pro- - vides some protection against this: see the pcre2_set_match_limit() + Another way that performance can be hit is by running a pattern that + has a very large search tree against a string that will never match. + Nested unlimited repeats in a pattern are a common example. PCRE2 pro- + vides some protection against this: see the pcre2_set_match_limit() function in the pcre2api page. USER DOCUMENTATION - The user documentation for PCRE2 comprises a number of different sec- - tions. In the "man" format, each of these is a separate "man page". In - the HTML format, each is a separate page, linked from the index page. - In the plain text format, the descriptions of the pcre2grep and + The user documentation for PCRE2 comprises a number of different sec- + tions. In the "man" format, each of these is a separate "man page". In + the HTML format, each is a separate page, linked from the index page. + In the plain text format, the descriptions of the pcre2grep and pcre2test programs are in files called pcre2grep.txt and pcre2test.txt, - respectively. The remaining sections, except for the pcre2demo section - (which is a program listing), and the short pages for individual func- - tions, are concatenated in pcre2.txt, for ease of searching. The sec- + respectively. The remaining sections, except for the pcre2demo section + (which is a program listing), and the short pages for individual func- + tions, are concatenated in pcre2.txt, for ease of searching. The sec- tions are as follows: pcre2 this document @@ -148,7 +149,7 @@ USER DOCUMENTATION pcre2test description of the pcre2test command pcre2unicode discussion of Unicode and UTF support - In the "man" and HTML formats, there is also a short page for each C + In the "man" and HTML formats, there is also a short page for each C library function, listing its arguments and results. @@ -158,14 +159,14 @@ AUTHOR University Computing Service Cambridge, England. - Putting an actual email address here is a spam magnet. If you want to - email me, use my two initials, followed by the two digits 10, at the + Putting an actual email address here is a spam magnet. If you want to + email me, use my two initials, followed by the two digits 10, at the domain cam.ac.uk. REVISION - Last updated: 13 April 2015 + Last updated: 16 October 2015 Copyright (c) 1997-2015 University of Cambridge. ------------------------------------------------------------------------------ @@ -190,13 +191,13 @@ PCRE2 NATIVE API BASIC FUNCTIONS uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext); - pcre2_code_free(pcre2_code *code); + void pcre2_code_free(pcre2_code *code); - pcre2_match_data_create(uint32_t ovecsize, + pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, pcre2_general_context *gcontext); - pcre2_match_data_create_from_pattern(const pcre2_code *code, - pcre2_general_context *gcontext); + pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, @@ -251,6 +252,9 @@ PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS int pcre2_set_character_tables(pcre2_compile_context *ccontext, const unsigned char *tables); + int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); + int pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t value); @@ -278,6 +282,9 @@ PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS int pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t value); + int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); + int pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t value); @@ -356,11 +363,11 @@ PCRE2 NATIVE API JIT FUNCTIONS PCRE2 NATIVE API SERIALIZATION FUNCTIONS int32_t pcre2_serialize_decode(pcre2_code **codes, - int32_t number_of_codes, const uint32_t *bytes, + int32_t number_of_codes, const uint8_t *bytes, pcre2_general_context *gcontext); - int32_t pcre2_serialize_encode(pcre2_code **codes, - int32_t number_of_codes, uint32_t **serialized_bytes, + int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); void pcre2_serialize_free(uint8_t *bytes); @@ -370,6 +377,10 @@ PCRE2 NATIVE API SERIALIZATION FUNCTIONS PCRE2 NATIVE API AUXILIARY FUNCTIONS + pcre2_code *pcre2_code_copy(const pcre2_code *code); + + pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); + int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, PCRE2_SIZE bufflen); @@ -480,10 +491,10 @@ PCRE2 API OVERVIEW pcre2_jit_stack_assign() in order to control the JIT code's memory usage. - JIT matching is automatically used by pcre2_match() if it is available. - There is also a direct interface for JIT matching, which gives improved - performance. The JIT-specific functions are discussed in the pcre2jit - documentation. + JIT matching is automatically used by pcre2_match() if it is available, + unless the PCRE2_NO_JIT option is set. There is also a direct interface + for JIT matching, which gives improved performance. The JIT-specific + functions are discussed in the pcre2jit documentation. A second matching function, pcre2_dfa_match(), which is not Perl-com- patible, is also provided. This uses a different algorithm for the @@ -516,76 +527,113 @@ PCRE2 API OVERVIEW return a copy of the subject string with substitutions for parts that were matched. - Finally, there are functions for finding out information about a com- - piled pattern (pcre2_pattern_info()) and about the configuration with + Functions whose names begin with pcre2_serialize_ are used for saving + compiled patterns on disc or elsewhere, and reloading them later. + + Finally, there are functions for finding out information about a com- + piled pattern (pcre2_pattern_info()) and about the configuration with which PCRE2 was built (pcre2_config()). + Functions with names ending with _free() are used for freeing memory + blocks of various sorts. In all cases, if one of these functions is + called with a NULL argument, it does nothing. + STRING LENGTHS AND OFFSETS - The PCRE2 API uses string lengths and offsets into strings of code - units in several places. These values are always of type PCRE2_SIZE, - which is an unsigned integer type, currently always defined as size_t. - The largest value that can be stored in such a type (that is - ~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated - strings and unset offsets. Therefore, the longest string that can be + The PCRE2 API uses string lengths and offsets into strings of code + units in several places. These values are always of type PCRE2_SIZE, + which is an unsigned integer type, currently always defined as size_t. + The largest value that can be stored in such a type (that is + ~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated + strings and unset offsets. Therefore, the longest string that can be handled is one less than this maximum. NEWLINES PCRE2 supports five different conventions for indicating line breaks in - strings: a single CR (carriage return) character, a single LF (line- + strings: a single CR (carriage return) character, a single LF (line- feed) character, the two-character sequence CRLF, any of the three pre- - ceding, or any Unicode newline sequence. The Unicode newline sequences - are the three just mentioned, plus the single characters VT (vertical + ceding, or any Unicode newline sequence. The Unicode newline sequences + are the three just mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS (paragraph separator, U+2029). - Each of the first three conventions is used by at least one operating + Each of the first three conventions is used by at least one operating system as its standard newline sequence. When PCRE2 is built, a default - can be specified. The default default is LF, which is the Unix stan- - dard. However, the newline convention can be changed by an application + can be specified. The default default is LF, which is the Unix stan- + dard. However, the newline convention can be changed by an application when calling pcre2_compile(), or it can be specified by special text at the start of the pattern itself; this overrides any other settings. See the pcre2pattern page for details of the special character sequences. - In the PCRE2 documentation the word "newline" is used to mean "the + In the PCRE2 documentation the word "newline" is used to mean "the character or pair of characters that indicate a line break". The choice - of newline convention affects the handling of the dot, circumflex, and + of newline convention affects the handling of the dot, circumflex, and dollar metacharacters, the handling of #-comments in /x mode, and, when - CRLF is a recognized line ending sequence, the match position advance- + CRLF is a recognized line ending sequence, the match position advance- ment for a non-anchored pattern. There is more detail about this in the section on pcre2_match() options below. - The choice of newline convention does not affect the interpretation of + The choice of newline convention does not affect the interpretation of the \n or \r escape sequences, nor does it affect what \R matches; this has its own separate convention. MULTITHREADING - In a multithreaded application it is important to keep thread-specific - data separate from data that can be shared between threads. The PCRE2 - library code itself is thread-safe: it contains no static or global - variables. The API is designed to be fairly simple for non-threaded - applications while at the same time ensuring that multithreaded appli- + In a multithreaded application it is important to keep thread-specific + data separate from data that can be shared between threads. The PCRE2 + library code itself is thread-safe: it contains no static or global + variables. The API is designed to be fairly simple for non-threaded + applications while at the same time ensuring that multithreaded appli- cations can use it. There are several different blocks of data that are used to pass infor- mation between the application and the PCRE2 libraries. - (1) A pointer to the compiled form of a pattern is returned to the user - when pcre2_compile() is successful. The data in the compiled pattern is - fixed, and does not change when the pattern is matched. Therefore, it - is thread-safe, that is, the same compiled pattern can be used by more - than one thread simultaneously. An application can compile all its pat- - terns at the start, before forking off multiple threads that use them. - However, if the just-in-time optimization feature is being used, it - needs separate memory stack areas for each thread. See the pcre2jit - documentation for more details. + The compiled pattern - (2) The next section below introduces the idea of "contexts" in which + A pointer to the compiled form of a pattern is returned to the user + when pcre2_compile() is successful. The data in the compiled pattern is + fixed, and does not change when the pattern is matched. Therefore, it + is thread-safe, that is, the same compiled pattern can be used by more + than one thread simultaneously. For example, an application can compile + all its patterns at the start, before forking off multiple threads that + use them. However, if the just-in-time optimization feature is being + used, it needs separate memory stack areas for each thread. See the + pcre2jit documentation for more details. + + In a more complicated situation, where patterns are compiled only when + they are first needed, but are still shared between threads, pointers + to compiled patterns must be protected from simultaneous writing by + multiple threads, at least until a pattern has been compiled. The logic + can be something like this: + + Get a read-only (shared) lock (mutex) for pointer + if (pointer == NULL) + { + Get a write (unique) lock for pointer + pointer = pcre2_compile(... + } + Release the lock + Use pointer in pcre2_match() + + Of course, testing for compilation errors should also be included in + the code. + + If JIT is being used, but the JIT compilation is not being done immedi- + ately, (perhaps waiting to see if the pattern is used often enough) + similar logic is required. JIT compilation updates a pointer within the + compiled code block, so a thread must gain unique write access to the + pointer before calling pcre2_jit_compile(). Alternatively, + pcre2_code_copy() or pcre2_code_copy_with_tables() can be used to + obtain a private copy of the compiled code. + + Context blocks + + The next main section below introduces the idea of "contexts" in which PCRE2 functions are called. A context is nothing more than a collection of parameters that control the way PCRE2 operates. Grouping a number of parameters together in a context is a convenient way of passing them to @@ -598,44 +646,45 @@ MULTITHREADING threads. However, if any thread needs to change any value in a context, it must make its own thread-specific copy. - (3) The matching functions need a block of memory for working space and - for storing the results of a match. This includes details of what was + Match blocks + + The matching functions need a block of memory for working space and for + storing the results of a match. This includes details of what was matched, as well as additional information such as the name of a - (*MARK) setting. Each thread must provide its own version of this mem- - ory. + (*MARK) setting. Each thread must provide its own copy of this memory. PCRE2 CONTEXTS - Some PCRE2 functions have a lot of parameters, many of which are used - only by specialist applications, for example, those that use custom - memory management or non-standard character tables. To keep function - argument lists at a reasonable size, and at the same time to keep the - API extensible, "uncommon" parameters are passed to certain functions - in a context instead of directly. A context is just a block of memory - that holds the parameter values. Applications that do not need to - adjust any of the context parameters can pass NULL when a context + Some PCRE2 functions have a lot of parameters, many of which are used + only by specialist applications, for example, those that use custom + memory management or non-standard character tables. To keep function + argument lists at a reasonable size, and at the same time to keep the + API extensible, "uncommon" parameters are passed to certain functions + in a context instead of directly. A context is just a block of memory + that holds the parameter values. Applications that do not need to + adjust any of the context parameters can pass NULL when a context pointer is required. - There are three different types of context: a general context that is - relevant for several PCRE2 operations, a compile-time context, and a + There are three different types of context: a general context that is + relevant for several PCRE2 operations, a compile-time context, and a match-time context. The general context - At present, this context just contains pointers to (and data for) - external memory management functions that are called from several + At present, this context just contains pointers to (and data for) + external memory management functions that are called from several places in the PCRE2 library. The context is named `general' rather than - specifically `memory' because in future other fields may be added. If - you do not want to supply your own custom memory management functions, - you do not need to bother with a general context. A general context is + specifically `memory' because in future other fields may be added. If + you do not want to supply your own custom memory management functions, + you do not need to bother with a general context. A general context is created by: pcre2_general_context *pcre2_general_context_create( void *(*private_malloc)(PCRE2_SIZE, void *), void (*private_free)(void *, void *), void *memory_data); - The two function pointers specify custom memory management functions, + The two function pointers specify custom memory management functions, whose prototypes are: void *private_malloc(PCRE2_SIZE, void *); @@ -643,16 +692,16 @@ PCRE2 CONTEXTS Whenever code in PCRE2 calls these functions, the final argument is the value of memory_data. Either of the first two arguments of the creation - function may be NULL, in which case the system memory management func- - tions malloc() and free() are used. (This is not currently useful, as - there are no other fields in a general context, but in future there - might be.) The private_malloc() function is used (if supplied) to - obtain memory for storing the context, and all three values are saved + function may be NULL, in which case the system memory management func- + tions malloc() and free() are used. (This is not currently useful, as + there are no other fields in a general context, but in future there + might be.) The private_malloc() function is used (if supplied) to + obtain memory for storing the context, and all three values are saved as part of the context. - Whenever PCRE2 creates a data block of any kind, the block contains a - pointer to the free() function that matches the malloc() function that - was used. When the time comes to free the block, this function is + Whenever PCRE2 creates a data block of any kind, the block contains a + pointer to the free() function that matches the malloc() function that + was used. When the time comes to free the block, this function is called. A general context can be copied by calling: @@ -667,20 +716,21 @@ PCRE2 CONTEXTS The compile context - A compile context is required if you want to change the default values + A compile context is required if you want to change the default values of any of the following compile-time parameters: What \R matches (Unicode newlines or CR, LF, CRLF only) PCRE2's character tables The newline character sequence The compile time nested parentheses limit + The maximum length of the pattern string An external function for stack checking - A compile context is also required if you are using custom memory man- - agement. If none of these apply, just pass NULL as the context argu- + A compile context is also required if you are using custom memory man- + agement. If none of these apply, just pass NULL as the context argu- ment of pcre2_compile(). - A compile context is created, copied, and freed by the following func- + A compile context is created, copied, and freed by the following func- tions: pcre2_compile_context *pcre2_compile_context_create( @@ -691,26 +741,36 @@ PCRE2 CONTEXTS void pcre2_compile_context_free(pcre2_compile_context *ccontext); - A compile context is created with default values for its parameters. + A compile context is created with default values for its parameters. These can be changed by calling the following functions, which return 0 on success, or PCRE2_ERROR_BADDATA if invalid data is detected. int pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value); - The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only - CR, LF, or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any + The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only + CR, LF, or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line ending sequence. The value is used by the JIT compiler and - by the two interpreted matching functions, pcre2_match() and + by the two interpreted matching functions, pcre2_match() and pcre2_dfa_match(). int pcre2_set_character_tables(pcre2_compile_context *ccontext, const unsigned char *tables); - The value must be the result of a call to pcre2_maketables(), whose + The value must be the result of a call to pcre2_maketables(), whose only argument is a general context. This function builds a set of char- acter tables in the current locale. + int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); + + This sets a maximum length, in code units, for the pattern string that + is to be compiled. If the pattern is longer, an error is generated. + This facility is provided so that applications that accept patterns + from external sources can limit their size. The default is the largest + number that a PCRE2_SIZE variable can hold, which is effectively unlim- + ited. + int pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t value); @@ -731,7 +791,9 @@ PCRE2 CONTEXTS This parameter ajusts the limit, set when PCRE2 is built (default 250), on the depth of parenthesis nesting in a pattern. This limit stops - rogue patterns using up too much system stack when being compiled. + rogue patterns using up too much system stack when being compiled. The + limit applies to parentheses of all kinds, not just capturing parenthe- + ses. int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, int (*guard_function)(uint32_t, void *), void *user_data); @@ -755,7 +817,8 @@ PCRE2 CONTEXTS any of the following match-time parameters: A callout function - The limit for calling match() + The offset limit for matching an unanchored pattern + The limit for calling match() (see below) The limit for calling match() recursively A match context is also required if you are using custom memory manage- @@ -785,6 +848,31 @@ PCRE2 CONTEXTS points during a matching operation. Details are given in the pcre2call- out documentation. + int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); + + The offset_limit parameter limits how far an unanchored search can + advance in the subject string. The default value is PCRE2_UNSET. The + pcre2_match() and pcre2_dfa_match() functions return + PCRE2_ERROR_NOMATCH if a match with a starting point before or at the + given offset is not found. For example, if the pattern /abc/ is matched + against "123abc" with an offset limit less than 3, the result is + PCRE2_ERROR_NO_MATCH. A match can never be found if the startoffset + argument of pcre2_match() or pcre2_dfa_match() is greater than the off- + set limit. + + When using this facility, you must set PCRE2_USE_OFFSET_LIMIT when + calling pcre2_compile() so that when JIT is in use, different code can + be compiled. If a match is started with a non-default match limit when + PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. + + The offset limit facility can be used to track progress when searching + large subject strings. See also the PCRE2_FIRSTLINE option, which + requires a match to start within the first line of the subject. If this + is set with an offset limit, a match must occur in the first line and + also within the offset limit. In other words, whichever limit comes + first is used. + int pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t value); @@ -834,19 +922,22 @@ PCRE2 CONTEXTS be used, or, when PCRE2 has been compiled to use memory on the heap instead of the stack, the amount of heap memory that can be used. This limit is not relevant, and is ignored, when matching is done using JIT - compiled code or by the pcre2_dfa_match() function. + compiled code. However, it is supported by pcre2_dfa_match(), which + uses recursive function calls less frequently than pcre2_match(), but + which can be caused to use a lot of stack by a recursive pattern such + as /(.)(?1)/ matched to a very long string. - The default value for recursion_limit can be set when PCRE2 is built; - the default default is the same value as the default for match_limit. - If the limit is exceeded, pcre2_match() returns PCRE2_ERROR_RECURSION- - LIMIT. A value for the recursion limit may also be supplied by an item - at the start of a pattern of the form + The default value for recursion_limit can be set when PCRE2 is built; + the default default is the same value as the default for match_limit. + If the limit is exceeded, pcre2_match() and pcre2_dfa_match() return + PCRE2_ERROR_RECURSIONLIMIT. A value for the recursion limit may also be + supplied by an item at the start of a pattern of the form (*LIMIT_RECURSION=ddd) - where ddd is a decimal number. However, such a setting is ignored - unless ddd is less than the limit set by the caller of pcre2_match() - or, if no such limit is set, less than the default. + where ddd is a decimal number. However, such a setting is ignored + unless ddd is less than the limit set by the caller of pcre2_match() or + pcre2_dfa_match() or, if no such limit is set, less than the default. int pcre2_set_recursion_memory_management( pcre2_match_context *mcontext, @@ -854,21 +945,21 @@ PCRE2 CONTEXTS void (*private_free)(void *, void *), void *memory_data); This function sets up two additional custom memory management functions - for use by pcre2_match() when PCRE2 is compiled to use the heap for + for use by pcre2_match() when PCRE2 is compiled to use the heap for remembering backtracking data, instead of recursive function calls that - use the system stack. There is a discussion about PCRE2's stack usage - in the pcre2stack documentation. See the pcre2build documentation for + use the system stack. There is a discussion about PCRE2's stack usage + in the pcre2stack documentation. See the pcre2build documentation for details of how to build PCRE2. - Using the heap for recursion is a non-standard way of building PCRE2, - for use in environments that have limited stacks. Because of the + Using the heap for recursion is a non-standard way of building PCRE2, + for use in environments that have limited stacks. Because of the greater use of memory management, pcre2_match() runs more slowly. Func- - tions that are different to the general custom memory functions are - provided so that special-purpose external code can be used for this - case, because the memory blocks are all the same size. The blocks are + tions that are different to the general custom memory functions are + provided so that special-purpose external code can be used for this + case, because the memory blocks are all the same size. The blocks are retained by pcre2_match() until it is about to exit so that they can be - re-used when possible during the match. In the absence of these func- - tions, the normal custom memory management functions are used, if sup- + re-used when possible during the match. In the absence of these func- + tions, the normal custom memory management functions are used, if sup- plied, otherwise the system functions. @@ -876,75 +967,75 @@ CHECKING BUILD-TIME OPTIONS int pcre2_config(uint32_t what, void *where); - The function pcre2_config() makes it possible for a PCRE2 client to - discover which optional features have been compiled into the PCRE2 - library. The pcre2build documentation has more details about these + The function pcre2_config() makes it possible for a PCRE2 client to + discover which optional features have been compiled into the PCRE2 + library. The pcre2build documentation has more details about these optional features. - The first argument for pcre2_config() specifies which information is - required. The second argument is a pointer to memory into which the - information is placed. If NULL is passed, the function returns the - amount of memory that is needed for the requested information. For - calls that return numerical values, the value is in bytes; when - requesting these values, where should point to appropriately aligned - memory. For calls that return strings, the required length is given in + The first argument for pcre2_config() specifies which information is + required. The second argument is a pointer to memory into which the + information is placed. If NULL is passed, the function returns the + amount of memory that is needed for the requested information. For + calls that return numerical values, the value is in bytes; when + requesting these values, where should point to appropriately aligned + memory. For calls that return strings, the required length is given in code units, not counting the terminating zero. - When requesting information, the returned value from pcre2_config() is - non-negative on success, or the negative error code PCRE2_ERROR_BADOP- - TION if the value in the first argument is not recognized. The follow- + When requesting information, the returned value from pcre2_config() is + non-negative on success, or the negative error code PCRE2_ERROR_BADOP- + TION if the value in the first argument is not recognized. The follow- ing information is available: PCRE2_CONFIG_BSR - The output is a uint32_t integer whose value indicates what character - sequences the \R escape sequence matches by default. A value of + The output is a uint32_t integer whose value indicates what character + sequences the \R escape sequence matches by default. A value of PCRE2_BSR_UNICODE means that \R matches any Unicode line ending - sequence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR, + sequence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. The default can be overridden when a pattern is compiled. PCRE2_CONFIG_JIT - The output is a uint32_t integer that is set to one if support for + The output is a uint32_t integer that is set to one if support for just-in-time compiling is available; otherwise it is set to zero. PCRE2_CONFIG_JITTARGET - The where argument should point to a buffer that is at least 48 code - units long. (The exact length required can be found by calling - pcre2_config() with where set to NULL.) The buffer is filled with a - string that contains the name of the architecture for which the JIT - compiler is configured, for example "x86 32bit (little endian + - unaligned)". If JIT support is not available, PCRE2_ERROR_BADOPTION is - returned, otherwise the number of code units used is returned. This is + The where argument should point to a buffer that is at least 48 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) The buffer is filled with a + string that contains the name of the architecture for which the JIT + compiler is configured, for example "x86 32bit (little endian + + unaligned)". If JIT support is not available, PCRE2_ERROR_BADOPTION is + returned, otherwise the number of code units used is returned. This is the length of the string, plus one unit for the terminating zero. PCRE2_CONFIG_LINKSIZE The output is a uint32_t integer that contains the number of bytes used - for internal linkage in compiled regular expressions. When PCRE2 is - configured, the value can be set to 2, 3, or 4, with the default being - 2. This is the value that is returned by pcre2_config(). However, when - the 16-bit library is compiled, a value of 3 is rounded up to 4, and - when the 32-bit library is compiled, internal linkages always use 4 + for internal linkage in compiled regular expressions. When PCRE2 is + configured, the value can be set to 2, 3, or 4, with the default being + 2. This is the value that is returned by pcre2_config(). However, when + the 16-bit library is compiled, a value of 3 is rounded up to 4, and + when the 32-bit library is compiled, internal linkages always use 4 bytes, so the configured value is not relevant. The default value of 2 for the 8-bit and 16-bit libraries is sufficient - for all but the most massive patterns, since it allows the size of the + for all but the most massive patterns, since it allows the size of the compiled pattern to be up to 64K code units. Larger values allow larger - regular expressions to be compiled by those two libraries, but at the + regular expressions to be compiled by those two libraries, but at the expense of slower matching. PCRE2_CONFIG_MATCHLIMIT - The output is a uint32_t integer that gives the default limit for the - number of internal matching function calls in a pcre2_match() execu- + The output is a uint32_t integer that gives the default limit for the + number of internal matching function calls in a pcre2_match() execu- tion. Further details are given with pcre2_match() below. PCRE2_CONFIG_NEWLINE - The output is a uint32_t integer whose value specifies the default - character sequence that is recognized as meaning "newline". The values + The output is a uint32_t integer whose value specifies the default + character sequence that is recognized as meaning "newline". The values are: PCRE2_NEWLINE_CR Carriage return (CR) @@ -953,56 +1044,56 @@ CHECKING BUILD-TIME OPTIONS PCRE2_NEWLINE_ANY Any Unicode line ending PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF - The default should normally correspond to the standard sequence for + The default should normally correspond to the standard sequence for your operating system. PCRE2_CONFIG_PARENSLIMIT - The output is a uint32_t integer that gives the maximum depth of nest- + The output is a uint32_t integer that gives the maximum depth of nest- ing of parentheses (of any kind) in a pattern. This limit is imposed to - cap the amount of system stack used when a pattern is compiled. It is - specified when PCRE2 is built; the default is 250. This limit does not - take into account the stack that may already be used by the calling - application. For finer control over compilation stack usage, see + cap the amount of system stack used when a pattern is compiled. It is + specified when PCRE2 is built; the default is 250. This limit does not + take into account the stack that may already be used by the calling + application. For finer control over compilation stack usage, see pcre2_set_compile_recursion_guard(). PCRE2_CONFIG_RECURSIONLIMIT - The output is a uint32_t integer that gives the default limit for the - depth of recursion when calling the internal matching function in a - pcre2_match() execution. Further details are given with pcre2_match() + The output is a uint32_t integer that gives the default limit for the + depth of recursion when calling the internal matching function in a + pcre2_match() execution. Further details are given with pcre2_match() below. PCRE2_CONFIG_STACKRECURSE - The output is a uint32_t integer that is set to one if internal recur- - sion when running pcre2_match() is implemented by recursive function - calls that use the system stack to remember their state. This is the - usual way that PCRE2 is compiled. The output is zero if PCRE2 was com- - piled to use blocks of data on the heap instead of recursive function + The output is a uint32_t integer that is set to one if internal recur- + sion when running pcre2_match() is implemented by recursive function + calls that use the system stack to remember their state. This is the + usual way that PCRE2 is compiled. The output is zero if PCRE2 was com- + piled to use blocks of data on the heap instead of recursive function calls. PCRE2_CONFIG_UNICODE_VERSION - The where argument should point to a buffer that is at least 24 code - units long. (The exact length required can be found by calling - pcre2_config() with where set to NULL.) If PCRE2 has been compiled - without Unicode support, the buffer is filled with the text "Unicode - not supported". Otherwise, the Unicode version string (for example, - "7.0.0") is inserted. The number of code units used is returned. This + The where argument should point to a buffer that is at least 24 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) If PCRE2 has been compiled + without Unicode support, the buffer is filled with the text "Unicode + not supported". Otherwise, the Unicode version string (for example, + "8.0.0") is inserted. The number of code units used is returned. This is the length of the string plus one unit for the terminating zero. PCRE2_CONFIG_UNICODE - The output is a uint32_t integer that is set to one if Unicode support - is available; otherwise it is set to zero. Unicode support implies UTF + The output is a uint32_t integer that is set to one if Unicode support + is available; otherwise it is set to zero. Unicode support implies UTF support. PCRE2_CONFIG_VERSION - The where argument should point to a buffer that is at least 12 code - units long. (The exact length required can be found by calling - pcre2_config() with where set to NULL.) The buffer is filled with the + The where argument should point to a buffer that is at least 12 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) The buffer is filled with the PCRE2 version string, zero-terminated. The number of code units used is returned. This is the length of the string plus one unit for the termi- nating zero. @@ -1014,34 +1105,59 @@ COMPILING A PATTERN uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext); - pcre2_code_free(pcre2_code *code); + void pcre2_code_free(pcre2_code *code); - The pcre2_compile() function compiles a pattern into an internal form. - The pattern is defined by a pointer to a string of code units and a - length, If the pattern is zero-terminated, the length can be specified - as PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of - memory that contains the compiled pattern and related data. The caller - must free the memory by calling pcre2_code_free() when it is no longer - needed. + pcre2_code *pcre2_code_copy(const pcre2_code *code); - NOTE: When one of the matching functions is called, pointers to the + pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); + + The pcre2_compile() function compiles a pattern into an internal form. + The pattern is defined by a pointer to a string of code units and a + length. If the pattern is zero-terminated, the length can be specified + as PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of + memory that contains the compiled pattern and related data, or NULL if + an error occurred. + + If the compile context argument ccontext is NULL, memory for the com- + piled pattern is obtained by calling malloc(). Otherwise, it is + obtained from the same memory function that was used for the compile + context. The caller must free the memory by calling pcre2_code_free() + when it is no longer needed. + + The function pcre2_code_copy() makes a copy of the compiled code in new + memory, using the same memory allocator as was used for the original. + However, if the code has been processed by the JIT compiler (see + below), the JIT information cannot be copied (because it is position- + dependent). The new copy can initially be used only for non-JIT match- + ing, though it can be passed to pcre2_jit_compile() if required. + + The pcre2_code_copy() function provides a way for individual threads in + a multithreaded application to acquire a private copy of shared com- + piled code. However, it does not make a copy of the character tables + used by the compiled pattern; the new pattern code points to the same + tables as the original code. (See "Locale Support" below for details + of these character tables.) In many applications the same tables are + used throughout, so this behaviour is appropriate. Nevertheless, there + are occasions when a copy of a compiled pattern and the relevant tables + are needed. The pcre2_code_copy_with_tables() provides this facility. + Copies of both the code and the tables are made, with the new code + pointing to the new tables. The memory for the new tables is automati- + cally freed when pcre2_code_free() is called for the new copy of the + compiled code. + + NOTE: When one of the matching functions is called, pointers to the compiled pattern and the subject string are set in the match data block - so that they can be referenced by the extraction functions. After run- - ning a match, you must not free a compiled pattern (or a subject - string) until after all operations on the match data block have taken - place. + so that they can be referenced by the substring extraction functions. + After running a match, you must not free a compiled pattern (or a sub- + ject string) until after all operations on the match data block have + taken place. - If the compile context argument ccontext is NULL, memory for the com- - piled pattern is obtained by calling malloc(). Otherwise, it is - obtained from the same memory function that was used for the compile - context. - - The options argument contains various bit settings that affect the com- - pilation. It should be zero if no options are required. The available - options are described below. Some of them (in particular, those that - are compatible with Perl, but some others as well) can also be set and - unset from within the pattern (see the detailed description in the - pcre2pattern documentation). + The options argument for pcre2_compile() contains various bit settings + that affect the compilation. It should be zero if no options are + required. The available options are described below. Some of them (in + particular, those that are compatible with Perl, but some others as + well) can also be set and unset from within the pattern (see the + detailed description in the pcre2pattern documentation). For those options that can be different in different parts of the pat- tern, the contents of the options argument specifies their settings at @@ -1053,13 +1169,23 @@ COMPILING A PATTERN above). If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme- - diately. Otherwise, if compilation of a pattern fails, pcre2_compile() - returns NULL, having set these variables to an error code and an offset - (number of code units) within the pattern, respectively. The - pcre2_get_error_message() function provides a textual message for each - error code. Compilation errors are positive numbers, but UTF formatting - errors are negative numbers. For an invalid UTF-8 or UTF-16 string, the - offset is that of the first code unit of the failing character. + diately. Otherwise, the variables to which these point are set to an + error code and an offset (number of code units) within the pattern, + respectively, when pcre2_compile() returns NULL because a compilation + error has occurred. The values are not defined when compilation is suc- + cessful and pcre2_compile() returns a non-NULL value. + + The value returned in erroroffset is an indication of where in the pat- + tern the error occurred. It is not necessarily the furthest point in + the pattern that was read. For example, after the error "lookbehind + assertion is not fixed length", the error offset points to the start of + the failing assertion. + + The pcre2_get_error_message() function (see "Obtaining a textual error + message" below) provides a textual message for each error code. Compi- + lation errors have positive error codes; UTF formatting error codes are + negative. For an invalid UTF-8 or UTF-16 string, the offset is that of + the first code unit of the failing character. Some errors are not detected until the whole pattern has been scanned; in these cases, the offset passed back is the length of the pattern. @@ -1128,11 +1254,25 @@ COMPILING A PATTERN Perl. If you want a multiline circumflex also to match after a termi- nating newline, you must set PCRE2_ALT_CIRCUMFLEX. + PCRE2_ALT_VERBNAMES + + By default, for compatibility with Perl, the name in any verb sequence + such as (*MARK:NAME) is any sequence of characters that does not + include a closing parenthesis. The name is not processed in any way, + and it is not possible to include a closing parenthesis in the name. + However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash + processing is applied to verb names and only an unescaped closing + parenthesis terminates the name. A closing parenthesis can be included + in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED + option is set, unescaped whitespace in verb names is skipped and #-com- + ments are recognized, exactly as in the rest of the pattern. + PCRE2_AUTO_CALLOUT - If this bit is set, pcre2_compile() automatically inserts callout - items, all with number 255, before each pattern item. For discussion of - the callout facility, see the pcre2callout documentation. + If this bit is set, pcre2_compile() automatically inserts callout + items, all with number 255, before each pattern item, except immedi- + ately before or after a callout in the pattern. For discussion of the + callout facility, see the pcre2callout documentation. PCRE2_CASELESS @@ -1196,7 +1336,11 @@ COMPILING A PATTERN If this option is set, an unanchored pattern is required to match before or at the first newline in the subject string, though the - matched text may continue over the newline. + matched text may continue over the newline. See also PCRE2_USE_OFF- + SET_LIMIT, which provides a more general limiting facility. If + PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the + first line and also within the offset limit. In other words, whichever + limit comes first is used. PCRE2_MATCH_UNSET_BACKREF @@ -1236,7 +1380,9 @@ COMPILING A PATTERN piled. This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because it may leave the current matching point in the middle of a multi-code-unit character. This option may be useful in - applications that process patterns from external sources. + applications that process patterns from external sources. Note that + there is also a build-time option that permanently locks out the use of + \C. PCRE2_NEVER_UCP @@ -1263,118 +1409,130 @@ COMPILING A PATTERN theses in the pattern. Any opening parenthesis that is not followed by ? behaves as if it were followed by ?: but named parentheses can still be used for capturing (and they acquire numbers in the usual way). - There is no equivalent of this option in Perl. + There is no equivalent of this option in Perl. Note that, if this + option is set, references to capturing groups (back references or + recursion/subroutine calls) may only refer to named groups, though the + reference can be by name or by number. PCRE2_NO_AUTO_POSSESS If this option is set, it disables "auto-possessification", which is an - optimization that, for example, turns a+b into a++b in order to avoid - backtracks into a+ that can never be successful. However, if callouts - are in use, auto-possessification means that some callouts are never + optimization that, for example, turns a+b into a++b in order to avoid + backtracks into a+ that can never be successful. However, if callouts + are in use, auto-possessification means that some callouts are never taken. You can set this option if you want the matching functions to do - a full unoptimized search and run all the callouts, but it is mainly + a full unoptimized search and run all the callouts, but it is mainly provided for testing purposes. PCRE2_NO_DOTSTAR_ANCHOR If this option is set, it disables an optimization that is applied when - .* is the first significant item in a top-level branch of a pattern, - and all the other branches also start with .* or with \A or \G or ^. - The optimization is automatically disabled for .* if it is inside an - atomic group or a capturing group that is the subject of a back refer- - ence, or if the pattern contains (*PRUNE) or (*SKIP). When the opti- - mization is not disabled, such a pattern is automatically anchored if + .* is the first significant item in a top-level branch of a pattern, + and all the other branches also start with .* or with \A or \G or ^. + The optimization is automatically disabled for .* if it is inside an + atomic group or a capturing group that is the subject of a back refer- + ence, or if the pattern contains (*PRUNE) or (*SKIP). When the opti- + mization is not disabled, such a pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set - for any ^ items. Otherwise, the fact that any match must start either - at the start of the subject or following a newline is remembered. Like + for any ^ items. Otherwise, the fact that any match must start either + at the start of the subject or following a newline is remembered. Like other optimizations, this can cause callouts to be skipped. PCRE2_NO_START_OPTIMIZE - This is an option whose main effect is at matching time. It does not + This is an option whose main effect is at matching time. It does not change what pcre2_compile() generates, but it does affect the output of the JIT compiler. - There are a number of optimizations that may occur at the start of a - match, in order to speed up the process. For example, if it is known - that an unanchored match must start with a specific character, the - matching code searches the subject for that character, and fails imme- - diately if it cannot find it, without actually running the main match- - ing function. This means that a special item such as (*COMMIT) at the - start of a pattern is not considered until after a suitable starting - point for the match has been found. Also, when callouts or (*MARK) - items are in use, these "start-up" optimizations can cause them to be - skipped if the pattern is never actually used. The start-up optimiza- - tions are in effect a pre-scan of the subject that takes place before + There are a number of optimizations that may occur at the start of a + match, in order to speed up the process. For example, if it is known + that an unanchored match must start with a specific character, the + matching code searches the subject for that character, and fails imme- + diately if it cannot find it, without actually running the main match- + ing function. This means that a special item such as (*COMMIT) at the + start of a pattern is not considered until after a suitable starting + point for the match has been found. Also, when callouts or (*MARK) + items are in use, these "start-up" optimizations can cause them to be + skipped if the pattern is never actually used. The start-up optimiza- + tions are in effect a pre-scan of the subject that takes place before the pattern is run. The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations, - possibly causing performance to suffer, but ensuring that in cases - where the result is "no match", the callouts do occur, and that items + possibly causing performance to suffer, but ensuring that in cases + where the result is "no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) are considered at every possible starting position in the subject string. - Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching + Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation. Consider the pattern (*COMMIT)ABC - When this is compiled, PCRE2 records the fact that a match must start - with the character "A". Suppose the subject string is "DEFABC". The - start-up optimization scans along the subject, finds "A" and runs the - first match attempt from there. The (*COMMIT) item means that the pat- - tern must match the current starting position, which in this case, it - does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE - set, the initial scan along the subject string does not happen. The - first match attempt is run starting from "D" and when this fails, - (*COMMIT) prevents any further matches being tried, so the overall + When this is compiled, PCRE2 records the fact that a match must start + with the character "A". Suppose the subject string is "DEFABC". The + start-up optimization scans along the subject, finds "A" and runs the + first match attempt from there. The (*COMMIT) item means that the pat- + tern must match the current starting position, which in this case, it + does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE + set, the initial scan along the subject string does not happen. The + first match attempt is run starting from "D" and when this fails, + (*COMMIT) prevents any further matches being tried, so the overall result is "no match". There are also other start-up optimizations. For example, a minimum length for the subject may be recorded. Consider the pattern (*MARK:A)(X|Y) - The minimum length for a match is one character. If the subject is + The minimum length for a match is one character. If the subject is "ABC", there will be attempts to match "ABC", "BC", and "C". An attempt to match an empty string at the end of the subject does not take place, - because PCRE2 knows that the subject is now too short, and so the - (*MARK) is never encountered. In this case, the optimization does not + because PCRE2 knows that the subject is now too short, and so the + (*MARK) is never encountered. In this case, the optimization does not affect the overall match result, which is still "no match", but it does affect the auxiliary information that is returned. PCRE2_NO_UTF_CHECK - When PCRE2_UTF is set, the validity of the pattern as a UTF string is - automatically checked. There are discussions about the validity of - UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode + When PCRE2_UTF is set, the validity of the pattern as a UTF string is + automatically checked. There are discussions about the validity of + UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode document. If an invalid UTF sequence is found, pcre2_compile() returns a negative error code. If you know that your pattern is valid, and you want to skip this check - for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. - When it is set, the effect of passing an invalid UTF string as a pat- - tern is undefined. It may cause your program to crash or loop. Note - that this option can also be passed to pcre2_match() and + for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. + When it is set, the effect of passing an invalid UTF string as a pat- + tern is undefined. It may cause your program to crash or loop. Note + that this option can also be passed to pcre2_match() and pcre_dfa_match(), to suppress validity checking of the subject string. PCRE2_UCP This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W, - \w, and some of the POSIX character classes. By default, only ASCII - characters are recognized, but if PCRE2_UCP is set, Unicode properties - are used instead to classify characters. More details are given in the + \w, and some of the POSIX character classes. By default, only ASCII + characters are recognized, but if PCRE2_UCP is set, Unicode properties + are used instead to classify characters. More details are given in the section on generic character types in the pcre2pattern page. If you set - PCRE2_UCP, matching one of the items it affects takes much longer. The - option is available only if PCRE2 has been compiled with Unicode sup- + PCRE2_UCP, matching one of the items it affects takes much longer. The + option is available only if PCRE2 has been compiled with Unicode sup- port. PCRE2_UNGREEDY - This option inverts the "greediness" of the quantifiers so that they - are not greedy by default, but become greedy if followed by "?". It is - not compatible with Perl. It can also be set by a (?U) option setting + This option inverts the "greediness" of the quantifiers so that they + are not greedy by default, but become greedy if followed by "?". It is + not compatible with Perl. It can also be set by a (?U) option setting within the pattern. + PCRE2_USE_OFFSET_LIMIT + + This option must be set for pcre2_compile() if pcre2_set_offset_limit() + is going to be used to set a non-default offset limit in a match con- + text for matches that use this pattern. An error is generated if an + offset limit is set without this option. For more details, see the + description of pcre2_set_offset_limit() in the section that describes + match contexts. See also the PCRE2_FIRSTLINE option above. + PCRE2_UTF This option causes PCRE2 to regard both the pattern and the subject @@ -1389,11 +1547,12 @@ COMPILING A PATTERN COMPILATION ERROR CODES There are over 80 positive error codes that pcre2_compile() may return - if it finds an error in the pattern. There are also some negative error - codes that are used for invalid UTF strings. These are the same as - given by pcre2_match() and pcre2_dfa_match(), and are described in the - pcre2unicode page. The pcre2_get_error_message() function can be called - to obtain a textual error message from any error code. + (via errorcode) if it finds an error in the pattern. There are also + some negative error codes that are used for invalid UTF strings. These + are the same as given by pcre2_match() and pcre2_dfa_match(), and are + described in the pcre2unicode page. The pcre2_get_error_message() func- + tion (see "Obtaining a textual error message" below) can be called to + obtain a textual error message from any error code. JUST-IN-TIME (JIT) COMPILATION @@ -1415,53 +1574,53 @@ JUST-IN-TIME (JIT) COMPILATION void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); - These functions provide support for JIT compilation, which, if the - just-in-time compiler is available, further processes a compiled pat- + These functions provide support for JIT compilation, which, if the + just-in-time compiler is available, further processes a compiled pat- tern into machine code that executes much faster than the pcre2_match() - interpretive matching function. Full details are given in the pcre2jit + interpretive matching function. Full details are given in the pcre2jit documentation. - JIT compilation is a heavyweight optimization. It can take some time - for patterns to be analyzed, and for one-off matches and simple pat- - terns the benefit of faster execution might be offset by a much slower - compilation time. Most, but not all patterns can be optimized by the + JIT compilation is a heavyweight optimization. It can take some time + for patterns to be analyzed, and for one-off matches and simple pat- + terns the benefit of faster execution might be offset by a much slower + compilation time. Most, but not all patterns can be optimized by the JIT compiler. LOCALE SUPPORT - PCRE2 handles caseless matching, and determines whether characters are - letters, digits, or whatever, by reference to a set of tables, indexed - by character code point. This applies only to characters whose code - points are less than 256. By default, higher-valued code points never - match escapes such as \w or \d. However, if PCRE2 is built with UTF - support, all characters can be tested with \p and \P, or, alterna- - tively, the PCRE2_UCP option can be set when a pattern is compiled; - this causes \w and friends to use Unicode property support instead of + PCRE2 handles caseless matching, and determines whether characters are + letters, digits, or whatever, by reference to a set of tables, indexed + by character code point. This applies only to characters whose code + points are less than 256. By default, higher-valued code points never + match escapes such as \w or \d. However, if PCRE2 is built with UTF + support, all characters can be tested with \p and \P, or, alterna- + tively, the PCRE2_UCP option can be set when a pattern is compiled; + this causes \w and friends to use Unicode property support instead of the built-in tables. - The use of locales with Unicode is discouraged. If you are handling - characters with code points greater than 128, you should either use + The use of locales with Unicode is discouraged. If you are handling + characters with code points greater than 128, you should either use Unicode support, or use locales, but not try to mix the two. - PCRE2 contains an internal set of character tables that are used by - default. These are sufficient for many applications. Normally, the + PCRE2 contains an internal set of character tables that are used by + default. These are sufficient for many applications. Normally, the internal tables recognize only ASCII characters. However, when PCRE2 is built, it is possible to cause the internal tables to be rebuilt in the default "C" locale of the local system, which may cause them to be dif- ferent. - The internal tables can be overridden by tables supplied by the appli- - cation that calls PCRE2. These may be created in a different locale - from the default. As more and more applications change to using Uni- + The internal tables can be overridden by tables supplied by the appli- + cation that calls PCRE2. These may be created in a different locale + from the default. As more and more applications change to using Uni- code, the need for this locale support is expected to die away. - External tables are built by calling the pcre2_maketables() function, - in the relevant locale. The result can be passed to pcre2_compile() as - often as necessary, by creating a compile context and calling - pcre2_set_character_tables() to set the tables pointer therein. For - example, to build and use tables that are appropriate for the French - locale (where accented characters with values greater than 128 are + External tables are built by calling the pcre2_maketables() function, + in the relevant locale. The result can be passed to pcre2_compile() as + often as necessary, by creating a compile context and calling + pcre2_set_character_tables() to set the tables pointer therein. For + example, to build and use tables that are appropriate for the French + locale (where accented characters with values greater than 128 are treated as letters), the following code could be used: setlocale(LC_CTYPE, "fr_FR"); @@ -1470,15 +1629,15 @@ LOCALE SUPPORT pcre2_set_character_tables(ccontext, tables); re = pcre2_compile(..., ccontext); - The locale name "fr_FR" is used on Linux and other Unix-like systems; - if you are using Windows, the name for the French locale is "french". - It is the caller's responsibility to ensure that the memory containing + The locale name "fr_FR" is used on Linux and other Unix-like systems; + if you are using Windows, the name for the French locale is "french". + It is the caller's responsibility to ensure that the memory containing the tables remains available for as long as it is needed. The pointer that is passed (via the compile context) to pcre2_compile() - is saved with the compiled pattern, and the same tables are used by - pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, com- - pilation, and matching all happen in the same locale, but different + is saved with the compiled pattern, and the same tables are used by + pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, com- + pilation, and matching all happen in the same locale, but different patterns can be processed in different locales. @@ -1486,13 +1645,13 @@ INFORMATION ABOUT A COMPILED PATTERN int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where); - The pcre2_pattern_info() function returns general information about a + The pcre2_pattern_info() function returns general information about a compiled pattern. For information about callouts, see the next section. - The first argument for pcre2_pattern_info() is a pointer to the com- + The first argument for pcre2_pattern_info() is a pointer to the com- piled pattern. The second argument specifies which piece of information - is required, and the third argument is a pointer to a variable to - receive the data. If the third argument is NULL, the first argument is - ignored, and the function returns the size in bytes of the variable + is required, and the third argument is a pointer to a variable to + receive the data. If the third argument is NULL, the first argument is + ignored, and the function returns the size in bytes of the variable that is required for the information requested. Otherwise, The yield of the function is zero for success, or one of the following negative num- bers: @@ -1502,9 +1661,9 @@ INFORMATION ABOUT A COMPILED PATTERN PCRE2_ERROR_BADOPTION the value of what was invalid PCRE2_ERROR_UNSET the requested field is not set - The "magic number" is placed at the start of each compiled pattern as - an simple check against passing an arbitrary memory pointer. Here is a - typical call of pcre2_pattern_info(), to obtain the length of the com- + The "magic number" is placed at the start of each compiled pattern as + an simple check against passing an arbitrary memory pointer. Here is a + typical call of pcre2_pattern_info(), to obtain the length of the com- piled pattern: int rc; @@ -1521,14 +1680,17 @@ INFORMATION ABOUT A COMPILED PATTERN PCRE2_INFO_ARGOPTIONS Return a copy of the pattern's options. The third argument should point - to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the - options that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOP- - TIONS returns the compile options as modified by any top-level option - settings at the start of the pattern itself. In other words, they are - the options that will be in force when matching starts. For example, if - the pattern /(?im)abc(?-i)d/ is compiled with the PCRE2_EXTENDED - option, the result is PCRE2_CASELESS, PCRE2_MULTILINE, and - PCRE2_EXTENDED. + to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the + options that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOP- + TIONS returns the compile options as modified by any top-level (*XXX) + option settings such as (*UTF) at the start of the pattern itself. + + For example, if the pattern /(*UTF)abc/ is compiled with the + PCRE2_EXTENDED option, the result for PCRE2_INFO_ALLOPTIONS is + PCRE2_EXTENDED and PCRE2_UTF. Option settings such as (?i) that can + change within a pattern do not affect the result of PCRE2_INFO_ALLOP- + TIONS, even if they appear right at the start of the pattern. (This was + different in some earlier releases.) A pattern compiled without PCRE2_ANCHORED is automatically anchored by PCRE2 if the first significant item in every top-level branch is one of @@ -1572,20 +1734,30 @@ INFORMATION ABOUT A COMPILED PATTERN PCRE2_INFO_CAPTURECOUNT - Return the number of capturing subpatterns in the pattern. The third - argument should point to an uint32_t variable. + Return the highest capturing subpattern number in the pattern. In pat- + terns where (?| is not used, this is also the total number of capturing + subpatterns. The third argument should point to an uint32_t variable. + + PCRE2_INFO_FIRSTBITMAP + + In the absence of a single first code unit for a non-anchored pattern, + pcre2_compile() may construct a 256-bit table that defines a fixed set + of values for the first code unit in any match. For example, a pattern + that starts with [abc] results in a table with three bits set. When + code unit values greater than 255 are supported, the flag bit for 255 + means "any code unit of value 255 or above". If such a table was con- + structed, a pointer to it is returned. Otherwise NULL is returned. The + third argument should point to an const uint8_t * variable. PCRE2_INFO_FIRSTCODETYPE Return information about the first code unit of any matched string, for a non-anchored pattern. The third argument should point to an uint32_t - variable. - - If there is a fixed first value, for example, the letter "c" from a - pattern such as (cat|cow|coyote), 1 is returned, and the character - value can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no - fixed first value, but it is known that a match can occur only at the - start of the subject or following a newline in the subject, 2 is + variable. If there is a fixed first value, for example, the letter "c" + from a pattern such as (cat|cow|coyote), 1 is returned, and the charac- + ter value can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is + no fixed first value, but it is known that a match can occur only at + the start of the subject or following a newline in the subject, 2 is returned. Otherwise, and for anchored patterns, 0 is returned. PCRE2_INFO_FIRSTCODEUNIT @@ -1598,16 +1770,10 @@ INFORMATION ABOUT A COMPILED PATTERN value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode. - PCRE2_INFO_FIRSTBITMAP + PCRE2_INFO_HASBACKSLASHC - In the absence of a single first code unit for a non-anchored pattern, - pcre2_compile() may construct a 256-bit table that defines a fixed set - of values for the first code unit in any match. For example, a pattern - that starts with [abc] results in a table with three bits set. When - code unit values greater than 255 are supported, the flag bit for 255 - means "any code unit of value 255 or above". If such a table was con- - structed, a pointer to it is returned. Otherwise NULL is returned. The - third argument should point to an const uint8_t * variable. + Return 1 if the pattern contains any instances of \C, otherwise 0. The + third argument should point to an uint32_t variable. PCRE2_INFO_HASCRORLF @@ -1635,24 +1801,26 @@ INFORMATION ABOUT A COMPILED PATTERN any matched string, other than at its start. The third argument should point to an uint32_t variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be - retrieved using PCRE2_INFO_LASTCODEUNIT. - - For anchored patterns, a last literal value is recorded only if it fol- - lows something of variable length. For example, for the pattern - /^a\d+z\d+/ the returned value is 1 (with "z" returned from - PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is 0. + retrieved using PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last + literal value is recorded only if it follows something of variable + length. For example, for the pattern /^a\d+z\d+/ the returned value is + 1 (with "z" returned from PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ + the returned value is 0. PCRE2_INFO_LASTCODEUNIT - Return the value of the rightmost literal data unit that must exist in - any matched string, other than at its start, if such a value has been - recorded. The third argument should point to an uint32_t variable. If + Return the value of the rightmost literal data unit that must exist in + any matched string, other than at its start, if such a value has been + recorded. The third argument should point to an uint32_t variable. If there is no such value, 0 is returned. PCRE2_INFO_MATCHEMPTY - Return 1 if the pattern can match an empty string, otherwise 0. The - third argument should point to an uint32_t variable. + Return 1 if the pattern might match an empty string, otherwise 0. The + third argument should point to an uint32_t variable. When a pattern + contains recursive subroutine calls it is not always possible to deter- + mine whether or not it can match an empty string. PCRE2 takes a cau- + tious approach and returns 1 in such cases. PCRE2_INFO_MATCHLIMIT @@ -1809,11 +1977,11 @@ SERIALIZATION AND PRECOMPILING THE MATCH DATA BLOCK - pcre2_match_data_create(uint32_t ovecsize, + pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, pcre2_general_context *gcontext); - pcre2_match_data_create_from_pattern(const pcre2_code *code, - pcre2_general_context *gcontext); + pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); void pcre2_match_data_free(pcre2_match_data *match_data); @@ -1821,7 +1989,7 @@ THE MATCH DATA BLOCK match data block, which is an opaque structure that is accessed by function calls. In particular, the match data block contains a vector of offsets into the subject string that define the matched part of the - subject and any substrings that were captured. This is know as the + subject and any substrings that were captured. This is known as the ovector. Before calling pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match() @@ -1962,72 +2130,88 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION The unused bits of the options argument for pcre2_match() must be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL, - PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, + PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below. Setting PCRE2_ANCHORED at match time is not supported by the just-in- time (JIT) compiler. If it is set, JIT matching is disabled and the - normal interpretive code in pcre2_match() is run. The remaining options - are supported for JIT matching. + normal interpretive code in pcre2_match() is run. Apart from + PCRE2_NO_JIT (obviously), the remaining options are supported for JIT + matching. PCRE2_ANCHORED The PCRE2_ANCHORED option limits pcre2_match() to matching at the first - matching position. If a pattern was compiled with PCRE2_ANCHORED, or - turned out to be anchored by virtue of its contents, it cannot be made - unachored at matching time. Note that setting the option at match time + matching position. If a pattern was compiled with PCRE2_ANCHORED, or + turned out to be anchored by virtue of its contents, it cannot be made + unachored at matching time. Note that setting the option at match time disables JIT matching. PCRE2_NOTBOL This option specifies that first character of the subject string is not - the beginning of a line, so the circumflex metacharacter should not - match before it. Setting this without having set PCRE2_MULTILINE at + the beginning of a line, so the circumflex metacharacter should not + match before it. Setting this without having set PCRE2_MULTILINE at compile time causes circumflex never to match. This option affects only the behaviour of the circumflex metacharacter. It does not affect \A. PCRE2_NOTEOL This option specifies that the end of the subject string is not the end - of a line, so the dollar metacharacter should not match it nor (except - in multiline mode) a newline immediately before it. Setting this with- - out having set PCRE2_MULTILINE at compile time causes dollar never to + of a line, so the dollar metacharacter should not match it nor (except + in multiline mode) a newline immediately before it. Setting this with- + out having set PCRE2_MULTILINE at compile time causes dollar never to match. This option affects only the behaviour of the dollar metacharac- ter. It does not affect \Z or \z. PCRE2_NOTEMPTY An empty string is not considered to be a valid match if this option is - set. If there are alternatives in the pattern, they are tried. If all - the alternatives match the empty string, the entire match fails. For + set. If there are alternatives in the pattern, they are tried. If all + the alternatives match the empty string, the entire match fails. For example, if the pattern a?b? - is applied to a string not beginning with "a" or "b", it matches an + is applied to a string not beginning with "a" or "b", it matches an empty string at the start of the subject. With PCRE2_NOTEMPTY set, this - match is not valid, so pcre2_match() searches further into the string + match is not valid, so pcre2_match() searches further into the string for occurrences of "a" or "b". PCRE2_NOTEMPTY_ATSTART - This is like PCRE2_NOTEMPTY, except that it locks out an empty string + This is like PCRE2_NOTEMPTY, except that it locks out an empty string match only at the first matching position, that is, at the start of the - subject plus the starting offset. An empty string match later in the - subject is permitted. If the pattern is anchored, such a match can + subject plus the starting offset. An empty string match later in the + subject is permitted. If the pattern is anchored, such a match can occur only if the pattern contains \K. + PCRE2_NO_JIT + + By default, if a pattern has been successfully processed by + pcre2_jit_compile(), JIT is automatically used when pcre2_match() is + called with options that JIT supports. Setting PCRE2_NO_JIT disables + the use of JIT; it forces matching to be done by the interpreter. + PCRE2_NO_UTF_CHECK When PCRE2_UTF is set at compile time, the validity of the subject as a UTF string is checked by default when pcre2_match() is subsequently - called. The entire string is checked before any other processing takes - place, and a negative error code is returned if the check fails. There - are several UTF error codes for each code unit width, corresponding to - different problems with the code unit sequence. The value of startoff- - set is also checked, to ensure that it points to the start of a charac- - ter or to the end of the subject. There are discussions about the + called. If a non-zero starting offset is given, the check is applied + only to that part of the subject that could be inspected during match- + ing, and there is a check that the starting offset points to the first + code unit of a character or to the end of the subject. If there are no + lookbehind assertions in the pattern, the check starts at the starting + offset. Otherwise, it starts at the length of the longest lookbehind + before the starting offset, or at the start of the subject if there are + not that many characters before the starting offset. Note that the + sequences \b and \B are one-character lookbehinds. + + The check is carried out before any other processing takes place, and a + negative error code is returned if the check fails. There are several + UTF error codes for each code unit width, corresponding to different + problems with the code unit sequence. There are discussions about the validity of UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode page. @@ -2068,32 +2252,35 @@ NEWLINE HANDLING WHEN MATCHING When PCRE2 is built, a default newline convention is set; this is usu- ally the standard convention for the operating system. The default can - be overridden in a compile context. During matching, the newline - choice affects the behaviour of the dot, circumflex, and dollar - metacharacters. It may also alter the way the match starting position - is advanced after a match failure for an unanchored pattern. + be overridden in a compile context by calling pcre2_set_newline(). It + can also be overridden by starting a pattern string with, for example, + (*CRLF), as described in the section on newline conventions in the + pcre2pattern page. During matching, the newline choice affects the be- + haviour of the dot, circumflex, and dollar metacharacters. It may also + alter the way the match starting position is advanced after a match + failure for an unanchored pattern. When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is - set as the newline convention, and a match attempt for an unanchored + set as the newline convention, and a match attempt for an unanchored pattern fails when the current starting position is at a CRLF sequence, - and the pattern contains no explicit matches for CR or LF characters, - the match position is advanced by two characters instead of one, in + and the pattern contains no explicit matches for CR or LF characters, + the match position is advanced by two characters instead of one, in other words, to after the CRLF. The above rule is a compromise that makes the most common cases work as - expected. For example, if the pattern is .+A (and the PCRE2_DOTALL + expected. For example, if the pattern is .+A (and the PCRE2_DOTALL option is not set), it does not match the string "\r\nA" because, after - failing at the start, it skips both the CR and the LF before retrying. - However, the pattern [\r\n]A does match that string, because it con- + failing at the start, it skips both the CR and the LF before retrying. + However, the pattern [\r\n]A does match that string, because it con- tains an explicit CR or LF reference, and so advances only by one char- acter after the first failure. An explicit match for CR of LF is either a literal appearance of one of - those characters in the pattern, or one of the \r or \n escape - sequences. Implicit matches such as [^X] do not count, nor does \s, + those characters in the pattern, or one of the \r or \n escape + sequences. Implicit matches such as [^X] do not count, nor does \s, even though it includes CR and LF in the characters that it matches. - Notwithstanding the above, anomalous effects may still occur when CRLF + Notwithstanding the above, anomalous effects may still occur when CRLF is a valid newline sequence and explicit \r or \n escapes appear in the pattern. @@ -2104,24 +2291,25 @@ HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); - In general, a pattern matches a certain portion of the subject, and in - addition, further substrings from the subject may be picked out by - parenthesized parts of the pattern. Following the usage in Jeffrey - Friedl's book, this is called "capturing" in what follows, and the - phrase "capturing subpattern" or "capturing group" is used for a frag- - ment of a pattern that picks out a substring. PCRE2 supports several + In general, a pattern matches a certain portion of the subject, and in + addition, further substrings from the subject may be picked out by + parenthesized parts of the pattern. Following the usage in Jeffrey + Friedl's book, this is called "capturing" in what follows, and the + phrase "capturing subpattern" or "capturing group" is used for a frag- + ment of a pattern that picks out a substring. PCRE2 supports several other kinds of parenthesized subpattern that do not cause substrings to - be captured. The pcre2_pattern_info() function can be used to find out + be captured. The pcre2_pattern_info() function can be used to find out how many capturing subpatterns there are in a compiled pattern. - A successful match returns the overall matched string and any captured - substrings to the caller via a vector of PCRE2_SIZE values. This is - called the ovector, and is contained within the match data block. You - can obtain direct access to the ovector by calling pcre2_get_ovec- - tor_pointer() to find its address, and pcre2_get_ovector_count() to - find the number of pairs of values it contains. Alternatively, you can - use the auxiliary functions for accessing captured substrings by number - or by name (see below). + You can use auxiliary functions for accessing captured substrings by + number or by name, as described in sections below. + + Alternatively, you can make direct use of the vector of PCRE2_SIZE val- + ues, called the ovector, which contains the offsets of captured + strings. It is part of the match data block. The function + pcre2_get_ovector_pointer() returns the address of the ovector, and + pcre2_get_ovector_count() returns the number of pairs of values it con- + tains. Within the ovector, the first in each pair of values is set to the off- set of the first code unit of a substring, and the second is set to the @@ -2200,42 +2388,48 @@ OTHER INFORMATION ABOUT A MATCH failure to match (PCRE2_ERROR_NOMATCH), a (*MARK) name may be avail- able, and pcre2_get_mark() can be called. It returns a pointer to the zero-terminated name, which is within the compiled pattern. Otherwise - NULL is returned. After a successful match, the (*MARK) name that is - returned is the last one encountered on the matching path through the - pattern. After a "no match" or a partial match, the last encountered - (*MARK) name is returned. For example, consider this pattern: + NULL is returned. The length of the (*MARK) name (excluding the termi- + nating zero) is stored in the code unit that preceeds the name. You + should use this instead of relying on the terminating zero if the + (*MARK) name might contain a binary zero. + + After a successful match, the (*MARK) name that is returned is the last + one encountered on the matching path through the pattern. After a "no + match" or a partial match, the last encountered (*MARK) name is + returned. For example, consider this pattern: ^(*MARK:A)((*MARK:B)a|b)c - When it matches "bc", the returned mark is A. The B mark is "seen" in - the first branch of the group, but it is not on the matching path. On - the other hand, when this pattern fails to match "bx", the returned + When it matches "bc", the returned mark is A. The B mark is "seen" in + the first branch of the group, but it is not on the matching path. On + the other hand, when this pattern fails to match "bx", the returned mark is B. - After a successful match, a partial match, or one of the invalid UTF - errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can + After a successful match, a partial match, or one of the invalid UTF + errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can be called. After a successful or partial match it returns the code unit - offset of the character at which the match started. For a non-partial - match, this can be different to the value of ovector[0] if the pattern - contains the \K escape sequence. After a partial match, however, this - value is always the same as ovector[0] because \K does not affect the + offset of the character at which the match started. For a non-partial + match, this can be different to the value of ovector[0] if the pattern + contains the \K escape sequence. After a partial match, however, this + value is always the same as ovector[0] because \K does not affect the result of a partial match. - After a UTF check failure, pcre2_get_startchar() can be used to obtain + After a UTF check failure, pcre2_get_startchar() can be used to obtain the code unit offset of the invalid UTF character. Details are given in the pcre2unicode page. ERROR RETURNS FROM pcre2_match() - If pcre2_match() fails, it returns a negative number. This can be con- - verted to a text string by calling pcre2_get_error_message(). Negative - error codes are also returned by other functions, and are documented - with them. The codes are given names in the header file. If UTF check- - ing is in force and an invalid UTF subject string is detected, one of a - number of UTF-specific negative error codes is returned. Details are - given in the pcre2unicode page. The following are the other errors that - may be returned by pcre2_match(): + If pcre2_match() fails, it returns a negative number. This can be con- + verted to a text string by calling the pcre2_get_error_message() func- + tion (see "Obtaining a textual error message" below). Negative error + codes are also returned by other functions, and are documented with + them. The codes are given names in the header file. If UTF checking is + in force and an invalid UTF subject string is detected, one of a number + of UTF-specific negative error codes is returned. Details are given in + the pcre2unicode page. The following are the other errors that may be + returned by pcre2_match(): PCRE2_ERROR_NOMATCH @@ -2331,6 +2525,27 @@ ERROR RETURNS FROM pcre2_match() The internal recursion limit was reached. +OBTAINING A TEXTUAL ERROR MESSAGE + + int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); + + A text message for an error code from any PCRE2 function (compile, + match, or auxiliary) can be obtained by calling pcre2_get_error_mes- + sage(). The code is passed as the first argument, with the remaining + two arguments specifying a code unit buffer and its length, into which + the text message is placed. Note that the message is returned in code + units of the appropriate width for the library that is being used. + + The returned message is terminated with a trailing zero, and the func- + tion returns the number of code units used, excluding the trailing + zero. If the error number is unknown, the negative error code + PCRE2_ERROR_BADDATA is returned. If the buffer is too small, the mes- + sage is truncated (but still with a trailing zero), and the negative + error code PCRE2_ERROR_NOMEMORY is returned. None of the messages are + very long; a buffer size of 120 code units is ample. + + EXTRACTING CAPTURED SUBSTRINGS BY NUMBER int pcre2_substring_length_bynumber(pcre2_match_data *match_data, @@ -2346,39 +2561,39 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER void pcre2_substring_free(PCRE2_UCHAR *buffer); - Captured substrings can be accessed directly by using the ovector as + Captured substrings can be accessed directly by using the ovector as described above. For convenience, auxiliary functions are provided for - extracting captured substrings as new, separate, zero-terminated + extracting captured substrings as new, separate, zero-terminated strings. A substring that contains a binary zero is correctly extracted - and has a further zero added on the end, but the result is not, of + and has a further zero added on the end, but the result is not, of course, a C string. The functions in this section identify substrings by number. The number zero refers to the entire matched substring, with higher numbers refer- - ring to substrings captured by parenthesized groups. After a partial - match, only substring zero is available. An attempt to extract any - other substring gives the error PCRE2_ERROR_PARTIAL. The next section + ring to substrings captured by parenthesized groups. After a partial + match, only substring zero is available. An attempt to extract any + other substring gives the error PCRE2_ERROR_PARTIAL. The next section describes similar functions for extracting captured substrings by name. - If a pattern uses the \K escape sequence within a positive assertion, + If a pattern uses the \K escape sequence within a positive assertion, the reported start of a successful match can be greater than the end of - the match. For example, if the pattern (?=ab\K) is matched against - "ab", the start and end offset values for the match are 2 and 0. In - this situation, calling these functions with a zero substring number + the match. For example, if the pattern (?=ab\K) is matched against + "ab", the start and end offset values for the match are 2 and 0. In + this situation, calling these functions with a zero substring number extracts a zero-length empty string. - You can find the length in code units of a captured substring without - extracting it by calling pcre2_substring_length_bynumber(). The first - argument is a pointer to the match data block, the second is the group - number, and the third is a pointer to a variable into which the length - is placed. If you just want to know whether or not the substring has + You can find the length in code units of a captured substring without + extracting it by calling pcre2_substring_length_bynumber(). The first + argument is a pointer to the match data block, the second is the group + number, and the third is a pointer to a variable into which the length + is placed. If you just want to know whether or not the substring has been captured, you can pass the third argument as NULL. - The pcre2_substring_copy_bynumber() function copies a captured sub- - string into a supplied buffer, whereas pcre2_substring_get_bynumber() - copies it into new memory, obtained using the same memory allocation - function that was used for the match data block. The first two argu- - ments of these functions are a pointer to the match data block and a + The pcre2_substring_copy_bynumber() function copies a captured sub- + string into a supplied buffer, whereas pcre2_substring_get_bynumber() + copies it into new memory, obtained using the same memory allocation + function that was used for the match data block. The first two argu- + ments of these functions are a pointer to the match data block and a capturing group number. The final arguments of pcre2_substring_copy_bynumber() are a pointer to @@ -2387,25 +2602,25 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER for the extracted substring, excluding the terminating zero. For pcre2_substring_get_bynumber() the third and fourth arguments point - to variables that are updated with a pointer to the new memory and the - number of code units that comprise the substring, again excluding the - terminating zero. When the substring is no longer needed, the memory + to variables that are updated with a pointer to the new memory and the + number of code units that comprise the substring, again excluding the + terminating zero. When the substring is no longer needed, the memory should be freed by calling pcre2_substring_free(). - The return value from all these functions is zero for success, or a - negative error code. If the pattern match failed, the match failure - code is returned. If a substring number greater than zero is used - after a partial match, PCRE2_ERROR_PARTIAL is returned. Other possible + The return value from all these functions is zero for success, or a + negative error code. If the pattern match failed, the match failure + code is returned. If a substring number greater than zero is used + after a partial match, PCRE2_ERROR_PARTIAL is returned. Other possible error codes are: PCRE2_ERROR_NOMEMORY - The buffer was too small for pcre2_substring_copy_bynumber(), or the + The buffer was too small for pcre2_substring_copy_bynumber(), or the attempt to get memory failed for pcre2_substring_get_bynumber(). PCRE2_ERROR_NOSUBSTRING - There is no substring with that number in the pattern, that is, the + There is no substring with that number in the pattern, that is, the number is greater than the number of capturing parentheses. PCRE2_ERROR_UNAVAILABLE @@ -2416,8 +2631,8 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER PCRE2_ERROR_UNSET - The substring did not participate in the match. For example, if the - pattern is (abc)|(def) and the subject is "def", and the ovector con- + The substring did not participate in the match. For example, if the + pattern is (abc)|(def) and the subject is "def", and the ovector con- tains at least two capturing slots, substring number 1 is unset. @@ -2428,32 +2643,32 @@ EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS void pcre2_substring_list_free(PCRE2_SPTR *list); - The pcre2_substring_list_get() function extracts all available sub- - strings and builds a list of pointers to them. It also (optionally) - builds a second list that contains their lengths (in code units), + The pcre2_substring_list_get() function extracts all available sub- + strings and builds a list of pointers to them. It also (optionally) + builds a second list that contains their lengths (in code units), excluding a terminating zero that is added to each of them. All this is done in a single block of memory that is obtained using the same memory allocation function that was used to get the match data block. - This function must be called only after a successful match. If called + This function must be called only after a successful match. If called after a partial match, the error code PCRE2_ERROR_PARTIAL is returned. - The address of the memory block is returned via listptr, which is also + The address of the memory block is returned via listptr, which is also the start of the list of string pointers. The end of the list is marked - by a NULL pointer. The address of the list of lengths is returned via - lengthsptr. If your strings do not contain binary zeros and you do not + by a NULL pointer. The address of the list of lengths is returned via + lengthsptr. If your strings do not contain binary zeros and you do not therefore need the lengths, you may supply NULL as the lengthsptr argu- - ment to disable the creation of a list of lengths. The yield of the - function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the mem- - ory block could not be obtained. When the list is no longer needed, it + ment to disable the creation of a list of lengths. The yield of the + function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the mem- + ory block could not be obtained. When the list is no longer needed, it should be freed by calling pcre2_substring_list_free(). If this function encounters a substring that is unset, which can happen - when capturing subpattern number n+1 matches some part of the subject, - but subpattern n has not been used at all, it returns an empty string. - This can be distinguished from a genuine zero-length substring by + when capturing subpattern number n+1 matches some part of the subject, + but subpattern n has not been used at all, it returns an empty string. + This can be distinguished from a genuine zero-length substring by inspecting the appropriate offset in the ovector, which contain - PCRE2_UNSET for unset substrings, or by calling pcre2_sub- + PCRE2_UNSET for unset substrings, or by calling pcre2_sub- string_length_bynumber(). @@ -2473,39 +2688,39 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME void pcre2_substring_free(PCRE2_UCHAR *buffer); - To extract a substring by name, you first have to find associated num- + To extract a substring by name, you first have to find associated num- ber. For example, for this pattern: (a+)b(?\d+)... the number of the subpattern called "xxx" is 2. If the name is known to - be unique (PCRE2_DUPNAMES was not set), you can find the number from + be unique (PCRE2_DUPNAMES was not set), you can find the number from the name by calling pcre2_substring_number_from_name(). The first argu- - ment is the compiled pattern, and the second is the name. The yield of + ment is the compiled pattern, and the second is the name. The yield of the function is the subpattern number, PCRE2_ERROR_NOSUBSTRING if there - is no subpattern of that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if - there is more than one subpattern of that name. Given the number, you - can extract the substring directly, or use one of the functions + is no subpattern of that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if + there is more than one subpattern of that name. Given the number, you + can extract the substring directly, or use one of the functions described above. - For convenience, there are also "byname" functions that correspond to - the "bynumber" functions, the only difference being that the second - argument is a name instead of a number. If PCRE2_DUPNAMES is set and + For convenience, there are also "byname" functions that correspond to + the "bynumber" functions, the only difference being that the second + argument is a name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate names, these functions scan all the groups with the given name, and return the first named string that is set. - If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is - returned. If all groups with the name have numbers that are greater - than the number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is - returned. If there is at least one group with a slot in the ovector, + If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is + returned. If all groups with the name have numbers that are greater + than the number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is + returned. If there is at least one group with a slot in the ovector, but no group is found to be set, PCRE2_ERROR_UNSET is returned. Warning: If the pattern uses the (?| feature to set up multiple subpat- - terns with the same number, as described in the section on duplicate - subpattern numbers in the pcre2pattern page, you cannot use names to - distinguish the different subpatterns, because names are not included - in the compiled code. The matching process uses only numbers. For this - reason, the use of different names for subpatterns of the same number + terns with the same number, as described in the section on duplicate + subpattern numbers in the pcre2pattern page, you cannot use names to + distinguish the different subpatterns, because names are not included + in the compiled code. The matching process uses only numbers. For this + reason, the use of different names for subpatterns of the same number causes an error at compile time. @@ -2514,58 +2729,195 @@ CREATING A NEW STRING WITH SUBSTITUTIONS int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, - pcre2_match_context *mcontext, PCRE2_SPTR replacementzfP, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength, PCRE2_UCHAR *outputbufferP, PCRE2_SIZE *outlengthptr); - This function calls pcre2_match() and then makes a copy of the subject - string in outputbuffer, replacing the part that was matched with the - replacement string, whose length is supplied in rlength. This can be - given as PCRE2_ZERO_TERMINATED for a zero-terminated string. - In the replacement string, which is interpreted as a UTF string in UTF - mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK - option is set, a dollar character is an escape character that can spec- - ify the insertion of characters from capturing groups in the pattern. - The following forms are recognized: + This function calls pcre2_match() and then makes a copy of the subject + string in outputbuffer, replacing the part that was matched with the + replacement string, whose length is supplied in rlength. This can be + given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in + which a \K item in a lookahead in the pattern causes the match to end + before it starts are not supported, and give rise to an error return. - $$ insert a dollar character - $ insert the contents of group - ${} insert the contents of group - - Either a group number or a group name can be given for . Curly - brackets are required only if the following character would be inter- - preted as part of the number or name. The number may be zero to include - the entire matched string. For example, if the pattern a(b)c is - matched with "=abc=" and the replacement string "+$1$0$1+", the result - is "=+babcb+=". Group insertion is done by calling pcre2_copy_byname() - or pcre2_copy_bynumber() as appropriate. - - The first seven arguments of pcre2_substitute() are the same as for + The first seven arguments of pcre2_substitute() are the same as for pcre2_match(), except that the partial matching options are not permit- - ted, and match_data may be passed as NULL, in which case a match data - block is obtained and freed within this function, using memory manage- - ment functions from the match context, if provided, or else those that + ted, and match_data may be passed as NULL, in which case a match data + block is obtained and freed within this function, using memory manage- + ment functions from the match context, if provided, or else those that were used to allocate memory for the compiled code. - There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes - the function to iterate over the subject string, replacing every match- - ing substring. If this is not set, only the first matching substring is - replaced. - The outlengthptr argument must point to a variable that contains the - length, in code units, of the output buffer. It is updated to contain - the length of the new string, excluding the trailing zero that is auto- - matically added. + length, in code units, of the output buffer. If the function is suc- + cessful, the value is updated to contain the length of the new string, + excluding the trailing zero that is automatically added. - The function returns the number of replacements that were made. This - may be zero if no matches were found, and is never greater than 1 - unless PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a neg- - ative error code is returned. Except for PCRE2_ERROR_NOMATCH (which is - never returned), any errors from pcre2_match() or the substring copying - functions are passed straight back. PCRE2_ERROR_BADREPLACEMENT is - returned for an invalid replacement string (unrecognized sequence fol- - lowing a dollar sign), and PCRE2_ERROR_NOMEMORY is returned if the out- - put buffer is not big enough. + If the function is not successful, the value set via outlengthptr + depends on the type of error. For syntax errors in the replacement + string, the value is the offset in the replacement string where the + error was detected. For other errors, the value is PCRE2_UNSET by + default. This includes the case of the output buffer being too small, + unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set (see below), in which + case the value is the minimum length needed, including space for the + trailing zero. Note that in order to compute the required length, + pcre2_substitute() has to simulate all the matching and copying, + instead of giving an error return as soon as the buffer overflows. Note + also that the length is in code units, not bytes. + + In the replacement string, which is interpreted as a UTF string in UTF + mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK + option is set, a dollar character is an escape character that can spec- + ify the insertion of characters from capturing groups or (*MARK) items + in the pattern. The following forms are always recognized: + + $$ insert a dollar character + $ or ${} insert the contents of group + $*MARK or ${*MARK} insert the name of the last (*MARK) encountered + + Either a group number or a group name can be given for . Curly + brackets are required only if the following character would be inter- + preted as part of the number or name. The number may be zero to include + the entire matched string. For example, if the pattern a(b)c is + matched with "=abc=" and the replacement string "+$1$0$1+", the result + is "=+babcb+=". + + The facility for inserting a (*MARK) name can be used to perform simple + simultaneous substitutions, as this pcre2test example shows: + + /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK} + apple lemon + 2: pear orange + + As well as the usual options for pcre2_match(), a number of additional + options can be set in the options argument. + + PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject + string, replacing every matching substring. If this is not set, only + the first matching substring is replaced. If any matched substring has + zero length, after the substitution has happened, an attempt to find a + non-empty match at the same position is performed. If this is not suc- + cessful, the current position is advanced by one character except when + CRLF is a valid newline sequence and the next two characters are CR, + LF. In this case, the current position is advanced by two characters. + + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output + buffer is too small. The default action is to return PCRE2_ERROR_NOMEM- + ORY immediately. If this option is set, however, pcre2_substitute() + continues to go through the motions of matching and substituting (with- + out, of course, writing anything) in order to compute the size of buf- + fer that is needed. This value is passed back via the outlengthptr + variable, with the result of the function still being + PCRE2_ERROR_NOMEMORY. + + Passing a buffer size of zero is a permitted way of finding out how + much memory is needed for given substitution. However, this does mean + that the entire operation is carried out twice. Depending on the appli- + cation, it may be more efficient to allocate a large buffer and free + the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- + FLOW_LENGTH. + + PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups + that do not appear in the pattern to be treated as unset groups. This + option should be used with care, because it means that a typo in a + group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING + error. + + PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including + unknown groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be + treated as empty strings when inserted as described above. If this + option is not set, an attempt to insert an unset group causes the + PCRE2_ERROR_UNSET error. This option does not influence the extended + substitution syntax described below. + + PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the + replacement string. Without this option, only the dollar character is + special, and only the group insertion forms listed above are valid. + When PCRE2_SUBSTITUTE_EXTENDED is set, two things change: + + Firstly, backslash in a replacement string is interpreted as an escape + character. The usual forms such as \n or \x{ddd} can be used to specify + particular character codes, and backslash followed by any non-alphanu- + meric character quotes that character. Extended quoting can be coded + using \Q...\E, exactly as in pattern strings. + + There are also four escape sequences for forcing the case of inserted + letters. The insertion mechanism has three states: no case forcing, + force upper case, and force lower case. The escape sequences change the + current state: \U and \L change to upper or lower case forcing, respec- + tively, and \E (when not terminating a \Q quoted sequence) reverts to + no case forcing. The sequences \u and \l force the next character (if + it is a letter) to upper or lower case, respectively, and then the + state automatically reverts to no case forcing. Case forcing applies to + all inserted characters, including those from captured groups and let- + ters within \Q...\E quoted sequences. + + Note that case forcing sequences such as \U...\E do not nest. For exam- + ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final + \E has no effect. + + The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more + flexibility to group substitution. The syntax is similar to that used + by Bash: + + ${:-} + ${:+:} + + As before, may be a group number or a name. The first form speci- + fies a default value. If group is set, its value is inserted; if + not, is expanded and the result inserted. The second form + specifies strings that are expanded and inserted when group is set + or unset, respectively. The first form is just a convenient shorthand + for + + ${:+${}:} + + Backslash can be used to escape colons and closing curly brackets in + the replacement strings. A change of the case forcing state within a + replacement string remains in force afterwards, as shown in this + pcre2test example: + + /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo + body + 1: hello + somebody + 1: HELLO + + The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended + substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause + unknown groups in the extended syntax forms to be treated as unset. + + If successful, pcre2_substitute() returns the number of replacements + that were made. This may be zero if no matches were found, and is never + greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set. + + In the event of an error, a negative error code is returned. Except for + PCRE2_ERROR_NOMATCH (which is never returned), errors from + pcre2_match() are passed straight back. + + PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser- + tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. + + PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ- + ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) + when the simple (non-extended) syntax is used and PCRE2_SUBSTI- + TUTE_UNSET_EMPTY is not set. + + PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big + enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size + of buffer that is needed is returned via outlengthptr. Note that this + does not happen by default. + + PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in + the replacement string, with more particular errors being + PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REP- + MISSING_BRACE (closing curly bracket not found), PCRE2_BADSUBSTITUTION + (syntax error in extended group substitution), and PCRE2_BADSUBPATTERN + (the pattern match ended before it started, which can happen if \K is + used in an assertion). + + As for all PCRE2 errors, a text message that describes the error can be + obtained by calling the pcre2_get_error_message() function (see + "Obtaining a textual error message" above). DUPLICATE SUBPATTERN NAMES @@ -2604,8 +2956,8 @@ DUPLICATE SUBPATTERN NAMES no entries for the given name. The format of the name table is described above in the section entitled - Information about a pattern above. Given all the relevant entries for - the name, you can extract each of their numbers, and hence the captured + Information about a pattern. Given all the relevant entries for the + name, you can extract each of their numbers, and hence the captured data. @@ -2781,8 +3133,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION PCRE2_ERROR_DFA_UITEM This return is given if pcre2_dfa_match() encounters an item in the - pattern that it does not support, for instance, the use of \C or a back - reference. + pattern that it does not support, for instance, the use of \C in a UTF + mode or a back reference. PCRE2_ERROR_DFA_UCOND @@ -2826,8 +3178,8 @@ AUTHOR REVISION - Last updated: 22 April 2015 - Copyright (c) 1997-2015 University of Cambridge. + Last updated: 23 December 2016 + Copyright (c) 1997-2016 University of Cambridge. ------------------------------------------------------------------------------ @@ -2946,10 +3298,18 @@ UNICODE AND UTF SUPPORT PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also request this by starting with (*UCP). + +DISABLING THE USE OF \C + The \C escape sequence, which matches a single code unit, even in a UTF mode, can cause unpredictable behaviour because it may leave the cur- - rent matching point in the middle of a multi-code-unit character. It - can be locked out by setting the PCRE2_NEVER_BACKSLASH_C option. + rent matching point in the middle of a multi-code-unit character. The + application can lock it out by setting the PCRE2_NEVER_BACKSLASH_C + option when calling pcre2_compile(). There is also a build-time option + + --enable-never-backslash-C + + (note the upper case C) which locks out the use of \C entirely. JUST-IN-TIME COMPILER SUPPORT @@ -2958,10 +3318,10 @@ JUST-IN-TIME COMPILER SUPPORT --enable-jit - This support is available only for certain hardware architectures. If - this option is set for an unsupported architecture, a building error - occurs. See the pcre2jit documentation for a discussion of JIT usage. - When JIT support is enabled, pcre2grep automatically makes use of it, + This support is available only for certain hardware architectures. If + this option is set for an unsupported architecture, a building error + occurs. See the pcre2jit documentation for a discussion of JIT usage. + When JIT support is enabled, pcre2grep automatically makes use of it, unless you add --disable-pcre2grep-jit @@ -2971,14 +3331,14 @@ JUST-IN-TIME COMPILER SUPPORT NEWLINE RECOGNITION - By default, PCRE2 interprets the linefeed (LF) character as indicating - the end of a line. This is the normal newline character on Unix-like - systems. You can compile PCRE2 to use carriage return (CR) instead, by + By default, PCRE2 interprets the linefeed (LF) character as indicating + the end of a line. This is the normal newline character on Unix-like + systems. You can compile PCRE2 to use carriage return (CR) instead, by adding --enable-newline-is-cr - to the configure command. There is also an --enable-newline-is-lf + to the configure command. There is also an --enable-newline-is-lf option, which explicitly specifies linefeed as the newline character. Alternatively, you can specify that line endings are to be indicated by @@ -2991,76 +3351,76 @@ NEWLINE RECOGNITION --enable-newline-is-anycrlf - which causes PCRE2 to recognize any of the three sequences CR, LF, or + which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as indicating a line ending. Finally, a fifth option, specified by --enable-newline-is-any - causes PCRE2 to recognize any Unicode newline sequence. The Unicode + causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline sequences are the three just mentioned, plus the single charac- ters VT (vertical tab, U+000B), FF (form feed, U+000C), NEL (next line, - U+0085), LS (line separator, U+2028), and PS (paragraph separator, + U+0085), LS (line separator, U+2028), and PS (paragraph separator, U+2029). Whatever default line ending convention is selected when PCRE2 is built - can be overridden by applications that use the library. At build time + can be overridden by applications that use the library. At build time it is conventional to use the standard for your operating system. WHAT \R MATCHES - By default, the sequence \R in a pattern matches any Unicode newline - sequence, independently of what has been selected as the line ending + By default, the sequence \R in a pattern matches any Unicode newline + sequence, independently of what has been selected as the line ending sequence. If you specify --enable-bsr-anycrlf - the default is changed so that \R matches only CR, LF, or CRLF. What- - ever is selected when PCRE2 is built can be overridden by applications + the default is changed so that \R matches only CR, LF, or CRLF. What- + ever is selected when PCRE2 is built can be overridden by applications that use the called. HANDLING VERY LARGE PATTERNS - Within a compiled pattern, offset values are used to point from one - part to another (for example, from an opening parenthesis to an alter- - nation metacharacter). By default, in the 8-bit and 16-bit libraries, - two-byte values are used for these offsets, leading to a maximum size - for a compiled pattern of around 64K code units. This is sufficient to + Within a compiled pattern, offset values are used to point from one + part to another (for example, from an opening parenthesis to an alter- + nation metacharacter). By default, in the 8-bit and 16-bit libraries, + two-byte values are used for these offsets, leading to a maximum size + for a compiled pattern of around 64K code units. This is sufficient to handle all but the most gigantic patterns. Nevertheless, some people do - want to process truly enormous patterns, so it is possible to compile - PCRE2 to use three-byte or four-byte offsets by adding a setting such + want to process truly enormous patterns, so it is possible to compile + PCRE2 to use three-byte or four-byte offsets by adding a setting such as --with-link-size=3 - to the configure command. The value given must be 2, 3, or 4. For the - 16-bit library, a value of 3 is rounded up to 4. In these libraries, - using longer offsets slows down the operation of PCRE2 because it has - to load additional data when handling them. For the 32-bit library the - value is always 4 and cannot be overridden; the value of --with-link- + to the configure command. The value given must be 2, 3, or 4. For the + 16-bit library, a value of 3 is rounded up to 4. In these libraries, + using longer offsets slows down the operation of PCRE2 because it has + to load additional data when handling them. For the 32-bit library the + value is always 4 and cannot be overridden; the value of --with-link- size is ignored. AVOIDING EXCESSIVE STACK USAGE - When matching with the pcre2_match() function, PCRE2 implements back- - tracking by making recursive calls to an internal function called - match(). In environments where the size of the stack is limited, this - can severely limit PCRE2's operation. (The Unix environment does not - usually suffer from this problem, but it may sometimes be necessary to + When matching with the pcre2_match() function, PCRE2 implements back- + tracking by making recursive calls to an internal function called + match(). In environments where the size of the stack is limited, this + can severely limit PCRE2's operation. (The Unix environment does not + usually suffer from this problem, but it may sometimes be necessary to increase the maximum stack size. There is a discussion in the - pcre2stack documentation.) An alternative approach to recursion that - uses memory from the heap to remember data, instead of using recursive - function calls, has been implemented to work round the problem of lim- - ited stack size. If you want to build a version of PCRE2 that works + pcre2stack documentation.) An alternative approach to recursion that + uses memory from the heap to remember data, instead of using recursive + function calls, has been implemented to work round the problem of lim- + ited stack size. If you want to build a version of PCRE2 that works this way, add --disable-stack-for-recursion to the configure command. By default, the system functions malloc() and - free() are called to manage the heap memory that is required, but cus- - tom memory management functions can be called instead. PCRE2 runs + free() are called to manage the heap memory that is required, but cus- + tom memory management functions can be called instead. PCRE2 runs noticeably more slowly when built in this way. This option affects only the pcre2_match() function; it is not relevant for pcre2_dfa_match(). @@ -3068,30 +3428,30 @@ AVOIDING EXCESSIVE STACK USAGE LIMITING PCRE2 RESOURCE USAGE Internally, PCRE2 has a function called match(), which it calls repeat- - edly (sometimes recursively) when matching a pattern with the + edly (sometimes recursively) when matching a pattern with the pcre2_match() function. By controlling the maximum number of times this - function may be called during a single matching operation, a limit can - be placed on the resources used by a single call to pcre2_match(). The + function may be called during a single matching operation, a limit can + be placed on the resources used by a single call to pcre2_match(). The limit can be changed at run time, as described in the pcre2api documen- - tation. The default is 10 million, but this can be changed by adding a + tation. The default is 10 million, but this can be changed by adding a setting such as --with-match-limit=500000 - to the configure command. This setting has no effect on the + to the configure command. This setting has no effect on the pcre2_dfa_match() matching function. - In some environments it is desirable to limit the depth of recursive + In some environments it is desirable to limit the depth of recursive calls of match() more strictly than the total number of calls, in order - to restrict the maximum amount of stack (or heap, if --disable-stack- + to restrict the maximum amount of stack (or heap, if --disable-stack- for-recursion is specified) that is used. A second limit controls this; - it defaults to the value that is set for --with-match-limit, which - imposes no additional constraints. However, you can set a lower limit + it defaults to the value that is set for --with-match-limit, which + imposes no additional constraints. However, you can set a lower limit by adding, for example, --with-match-limit-recursion=10000 - to the configure command. This value can also be overridden at run + to the configure command. This value can also be overridden at run time. @@ -3099,45 +3459,45 @@ CREATING CHARACTER TABLES AT BUILD TIME PCRE2 uses fixed tables for processing characters whose code points are less than 256. By default, PCRE2 is built with a set of tables that are - distributed in the file src/pcre2_chartables.c.dist. These tables are + distributed in the file src/pcre2_chartables.c.dist. These tables are for ASCII codes only. If you add --enable-rebuild-chartables - to the configure command, the distributed tables are no longer used. - Instead, a program called dftables is compiled and run. This outputs + to the configure command, the distributed tables are no longer used. + Instead, a program called dftables is compiled and run. This outputs the source for new set of tables, created in the default locale of your - C run-time system. (This method of replacing the tables does not work - if you are cross compiling, because dftables is run on the local host. + C run-time system. (This method of replacing the tables does not work + if you are cross compiling, because dftables is run on the local host. If you need to create alternative tables when cross compiling, you will have to do so "by hand".) USING EBCDIC CODE - PCRE2 assumes by default that it will run in an environment where the - character code is ASCII or Unicode, which is a superset of ASCII. This + PCRE2 assumes by default that it will run in an environment where the + character code is ASCII or Unicode, which is a superset of ASCII. This is the case for most computer operating systems. PCRE2 can, however, be compiled to run in an 8-bit EBCDIC environment by adding --enable-ebcdic --disable-unicode to the configure command. This setting implies --enable-rebuild-charta- - bles. You should only use it if you know that you are in an EBCDIC + bles. You should only use it if you know that you are in an EBCDIC environment (for example, an IBM mainframe operating system). - It is not possible to support both EBCDIC and UTF-8 codes in the same - version of the library. Consequently, --enable-unicode and --enable- + It is not possible to support both EBCDIC and UTF-8 codes in the same + version of the library. Consequently, --enable-unicode and --enable- ebcdic are mutually exclusive. The EBCDIC character that corresponds to an ASCII LF is assumed to have - the value 0x15 by default. However, in some EBCDIC environments, 0x25 + the value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In such an environment you should use --enable-ebcdic-nl25 as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR - has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and + has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is not chosen as LF is made to correspond to the Unicode NEL char- acter (which, in Unicode, is 0x85). @@ -3146,34 +3506,48 @@ USING EBCDIC CODE an EBCDIC environment. +PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS + + By default, on non-Windows systems, pcre2grep supports the use of call- + outs with string arguments within the patterns it is matching, in order + to run external scripts. For details, see the pcre2grep documentation. + This support can be disabled by adding --disable-pcre2grep-callout to + the configure command. + + PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT - By default, pcre2grep reads all files as plain text. You can build it - so that it recognizes files whose names end in .gz or .bz2, and reads + By default, pcre2grep reads all files as plain text. You can build it + so that it recognizes files whose names end in .gz or .bz2, and reads them with libz or libbz2, respectively, by adding one or both of --enable-pcre2grep-libz --enable-pcre2grep-libbz2 to the configure command. These options naturally require that the rel- - evant libraries are installed on your system. Configuration will fail + evant libraries are installed on your system. Configuration will fail if they are not. PCRE2GREP BUFFER SIZE - pcre2grep uses an internal buffer to hold a "window" on the file it is + pcre2grep uses an internal buffer to hold a "window" on the file it is scanning, in order to be able to output "before" and "after" lines when - it finds a match. The size of the buffer is controlled by a parameter - whose default value is 20K. The buffer itself is three times this size, - but because of the way it is used for holding "before" lines, the long- - est line that is guaranteed to be processable is the parameter size. - You can change the default parameter value by adding, for example, + it finds a match. The starting size of the buffer is controlled by a + parameter whose default value is 20K. The buffer itself is three times + this size, but because of the way it is used for holding "before" + lines, the longest line that is guaranteed to be processable is the + parameter size. If a longer line is encountered, pcre2grep automati- + cally expands the buffer, up to a specified maximum size, whose default + is 1M or the starting size, whichever is the larger. You can change the + default parameter values by adding, for example, - --with-pcre2grep-bufsize=50K + --with-pcre2grep-bufsize=51200 + --with-pcre2grep-max-bufsize=2097152 - to the configure command. The caller of pcre2grep can override this - value by using --buffer-size on the command line.. + to the configure command. The caller of pcre2grep can override these + values by using --buffer-size and --max-buffer-size on the command + line. PCRE2TEST OPTION FOR LIBREADLINE SUPPORT @@ -3183,26 +3557,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT --enable-pcre2test-libreadline --enable-pcre2test-libedit - to the configure command, pcre2test is linked with the libreadline + to the configure command, pcre2test is linked with the libreadline orlibedit library, respectively, and when its input is from a terminal, - it reads it using the readline() function. This provides line-editing - and history facilities. Note that libreadline is GPL-licensed, so if - you distribute a binary of pcre2test linked in this way, there may be + it reads it using the readline() function. This provides line-editing + and history facilities. Note that libreadline is GPL-licensed, so if + you distribute a binary of pcre2test linked in this way, there may be licensing issues. These can be avoided by linking instead with libedit, which has a BSD licence. - Setting --enable-pcre2test-libreadline causes the -lreadline option to - be added to the pcre2test build. In many operating environments with a - sytem-installed readline library this is sufficient. However, in some + Setting --enable-pcre2test-libreadline causes the -lreadline option to + be added to the pcre2test build. In many operating environments with a + sytem-installed readline library this is sufficient. However, in some environments (e.g. if an unmodified distribution version of readline is - in use), some extra configuration may be necessary. The INSTALL file + in use), some extra configuration may be necessary. The INSTALL file for libreadline says this: "Readline uses the termcap functions, but does not link with the termcap or curses library itself, allowing applications which link with readline the to choose an appropriate library." - If your environment has not been set up so that an appropriate library + If your environment has not been set up so that an appropriate library is automatically included, you may need to add something like LIBS="-ncurses" @@ -3216,7 +3590,7 @@ INCLUDING DEBUGGING CODE --enable-debug - to the configure command, additional debugging code is included in the + to the configure command, additional debugging code is included in the build. This feature is intended for use by the PCRE2 maintainers. @@ -3226,15 +3600,15 @@ DEBUGGING WITH VALGRIND SUPPORT --enable-valgrind - to the configure command, PCRE2 will use valgrind annotations to mark - certain memory regions as unaddressable. This allows it to detect - invalid memory accesses, and is mostly useful for debugging PCRE2 + to the configure command, PCRE2 will use valgrind annotations to mark + certain memory regions as unaddressable. This allows it to detect + invalid memory accesses, and is mostly useful for debugging PCRE2 itself. CODE COVERAGE REPORTING - If your C compiler is gcc, you can build a version of PCRE2 that can + If your C compiler is gcc, you can build a version of PCRE2 that can generate a code coverage report for its test suite. To enable this, you must install lcov version 1.6 or above. Then specify @@ -3243,20 +3617,20 @@ CODE COVERAGE REPORTING to the configure command and build PCRE2 in the usual way. Note that using ccache (a caching C compiler) is incompatible with code - coverage reporting. If you have configured ccache to run automatically + coverage reporting. If you have configured ccache to run automatically on your system, you must set the environment variable CCACHE_DISABLE=1 before running make to build PCRE2, so that ccache is not used. - When --enable-coverage is used, the following addition targets are + When --enable-coverage is used, the following addition targets are added to the Makefile: make coverage - This creates a fresh coverage report for the PCRE2 test suite. It is - equivalent to running "make coverage-reset", "make coverage-baseline", + This creates a fresh coverage report for the PCRE2 test suite. It is + equivalent to running "make coverage-reset", "make coverage-baseline", "make check", and then "make coverage-report". make coverage-reset @@ -3273,21 +3647,44 @@ CODE COVERAGE REPORTING make coverage-clean-report - This removes the generated coverage report without cleaning the cover- + This removes the generated coverage report without cleaning the cover- age data itself. make coverage-clean-data - This removes the captured coverage data without removing the coverage + This removes the captured coverage data without removing the coverage files created at compile time (*.gcno). make coverage-clean - This cleans all coverage data including the generated coverage report. - For more information about code coverage, see the gcov and lcov docu- + This cleans all coverage data including the generated coverage report. + For more information about code coverage, see the gcov and lcov docu- mentation. +SUPPORT FOR FUZZERS + + There is a special option for use by people who want to run fuzzing + tests on PCRE2: + + --enable-fuzz-support + + At present this applies only to the 8-bit library. If set, it causes an + extra library called libpcre2-fuzzsupport.a to be built, but not + installed. This contains a single function called LLVMFuzzerTestOneIn- + put() whose arguments are a pointer to a string and the length of the + string. When called, this function tries to compile the string as a + pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the + string. Setting --enable-fuzz-support also causes a binary called + pcre2fuzzcheck to be created. This is normally run under valgrind or + used when PCRE2 is compiled with address sanitizing enabled. It calls + the fuzzing function and outputs information about it is doing. The + input strings are specified by arguments: if an argument starts with + "=" the rest of it is a literal input string. Otherwise, it is assumed + to be a file name, and the contents of the file are the test string. + + SEE ALSO pcre2api(3), pcre2-config(3). @@ -3302,8 +3699,8 @@ AUTHOR REVISION - Last updated: 24 April 2015 - Copyright (c) 1997-2015 University of Cambridge. + Last updated: 01 November 2016 + Copyright (c) 1997-2016 University of Cambridge. ------------------------------------------------------------------------------ @@ -3347,45 +3744,54 @@ DESCRIPTION If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2 automatically inserts callouts, all with number 255, before each - item in the pattern. For example, if PCRE2_AUTO_CALLOUT is used with + item in the pattern except for immediately before or after a callout + item in the pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern - A(\d{2}|--) + A(?C3)B it is processed as if it were + (?C255)A(?C3)B(?C255) + + Here is a more complicated example: + + A(\d{2}|--) + + With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were + (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) - Notice that there is a callout before and after each parenthesis and + Notice that there is a callout before and after each parenthesis and alternation bar. If the pattern contains a conditional group whose con- - dition is an assertion, an automatic callout is inserted immediately - before the condition. Such a callout may also be inserted explicitly, + dition is an assertion, an automatic callout is inserted immediately + before the condition. Such a callout may also be inserted explicitly, for example: (?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de) - This applies only to assertion conditions (because they are themselves + This applies only to assertion conditions (because they are themselves independent groups). - Callouts can be useful for tracking the progress of pattern matching. + Callouts can be useful for tracking the progress of pattern matching. The pcre2test program has a pattern qualifier (/auto_callout) that sets - automatic callouts. When any callouts are present, the output from - pcre2test indicates how the pattern is being matched. This is useful - information when you are trying to optimize the performance of a par- + automatic callouts. When any callouts are present, the output from + pcre2test indicates how the pattern is being matched. This is useful + information when you are trying to optimize the performance of a par- ticular pattern. MISSING CALLOUTS - You should be aware that, because of optimizations in the way PCRE2 + You should be aware that, because of optimizations in the way PCRE2 compiles and matches patterns, callouts sometimes do not happen exactly as you might expect. Auto-possessification At compile time, PCRE2 "auto-possessifies" repeated items when it knows - that what follows cannot be part of the repeat. For example, a+[bc] is - compiled as if it were a++[bc]. The pcre2test output when this pattern + that what follows cannot be part of the repeat. For example, a+[bc] is + compiled as if it were a++[bc]. The pcre2test output when this pattern is compiled with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string "aaaa" is: @@ -3394,11 +3800,12 @@ MISSING CALLOUTS +2 ^ ^ [bc] No match - This indicates that when matching [bc] fails, there is no backtracking - into a+ and therefore the callouts that would be taken for the back- - tracks do not occur. You can disable the auto-possessify feature by - passing PCRE2_NO_AUTO_POSSESS to pcre2_compile(), or starting the pat- - tern with (*NO_AUTO_POSSESS). In this case, the output changes to this: + This indicates that when matching [bc] fails, there is no backtracking + into a+ (because it is being treated as a++) and therefore the callouts + that would be taken for the backtracks do not occur. You can disable + the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to + pcre2_compile(), or starting the pattern with (*NO_AUTO_POSSESS). In + this case, the output changes to this: --->aaaa +0 ^ a+ @@ -3517,8 +3924,8 @@ THE CALLOUT INTERFACE For a numerical callout, callout_string is NULL, and callout_number contains the number of the callout, in the range 0-255. This is the - number that follows (?C for manual callouts; it is 255 for automati- - cally generated callouts. + number that follows (?C for callouts that part of the pattern; it is + 255 for automatically generated callouts. Fields for string callouts @@ -3579,10 +3986,16 @@ THE CALLOUT INTERFACE the next item to be matched. The next_item_length field contains the length of the next item to be - matched in the pattern string. When the callout immediately precedes an - alternation bar, a closing parenthesis, or the end of the pattern, the - length is zero. When the callout precedes an opening parenthesis, the - length is that of the entire subpattern. + processed in the pattern string. When the callout is at the end of the + pattern, the length is zero. When the callout precedes an opening + parenthesis, the length includes meta characters that follow the paren- + thesis. For example, in a callout before an assertion such as (?=ab) + the length is 3. For an an alternation bar or a closing parenthesis, + the length is one, unless a closing parenthesis is followed by a quan- + tifier, in which case its length is included. (This changed in release + 10.23. In earlier releases, before an opening parenthesis the length + was that of the entire subpattern, and before an alternation bar or a + closing parenthesis the length was zero.) The pattern_position and next_item_length fields are intended to help in distinguishing between different automatic callouts, which all have @@ -3666,8 +4079,8 @@ AUTHOR REVISION - Last updated: 23 March 2015 - Copyright (c) 1997-2015 University of Cambridge. + Last updated: 29 September 2016 + Copyright (c) 1997-2016 University of Cambridge. ------------------------------------------------------------------------------ @@ -3761,7 +4174,7 @@ DIFFERENCES BETWEEN PCRE2 AND PERL first one that is backtracked onto acts. For example, in the pattern A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases - it is the same as PCRE2, but there are examples where it differs. + it is the same as PCRE2, but there are cases where it differs. 11. Most backtracking verbs in assertions have their normal actions. They are not confined to the assertion. @@ -3775,18 +4188,18 @@ DIFFERENCES BETWEEN PCRE2 AND PERL pattern names is not as general as Perl's. This is a consequence of the fact the PCRE2 works internally just with numbers, using an external table to translate between numbers and names. In particular, a pattern - such as (?|(?A)|(?A)|(?B), where the two capturing parentheses have the same number but different names, is not supported, and causes an error at compile time. If it were allowed, it would not be possible to distinguish which parentheses matched, because both names map to cap- turing subpattern number 1. To avoid this confusing situation, an error is given at compile time. - 14. Perl recognizes comments in some places that PCRE2 does not, for - example, between the ( and ? at the start of a subpattern. If the /x - modifier is set, Perl allows white space between ( and ? (though cur- - rent Perls warn that this is deprecated) but PCRE2 never does, even if - the PCRE2_EXTENDED option is set. + 14. Perl used to recognize comments in some places that PCRE2 does not, + for example, between the ( and ? at the start of a subpattern. If the + /x modifier is set, Perl allowed white space between ( and ? though the + latest Perls give an error (for a while it was just deprecated). There + may still be some cases where Perl behaves differently. 15. Perl, when in warning mode, gives warnings for character classes such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter- @@ -3810,34 +4223,39 @@ DIFFERENCES BETWEEN PCRE2 AND PERL different length of string. Perl requires them all to have the same length. - (b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the + (b) From PCRE2 10.23, back references to groups of fixed length are + supported in lookbehinds, provided that there is no possibility of ref- + erencing a non-unique number or name. Perl does not support backrefer- + ences in lookbehinds. + + (c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ meta-character matches only at the very end of the string. - (c) A backslash followed by a letter with no special meaning is + (d) A backslash followed by a letter with no special meaning is faulted. (Perl can be made to issue a warning.) - (d) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti- + (e) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti- fiers is inverted, that is, by default they are not greedy, but if fol- lowed by a question mark they are. - (e) PCRE2_ANCHORED can be used at matching time to force a pattern to + (f) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried only at the first matching position in the subject string. - (f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, - PCRE2_NOTEMPTY_ATSTART, and PCRE2_NO_AUTO_CAPTURE options have no Perl + (g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, + PCRE2_NOTEMPTY_ATSTART, and PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents. - (g) The \R escape sequence can be restricted to match only CR, LF, or + (h) The \R escape sequence can be restricted to match only CR, LF, or CRLF by the PCRE2_BSR_ANYCRLF option. - (h) The callout facility is PCRE2-specific. + (i) The callout facility is PCRE2-specific. - (i) The partial matching facility is PCRE2-specific. + (j) The partial matching facility is PCRE2-specific. - (j) The alternative matching function (pcre2_dfa_match() matches in a + (k) The alternative matching function (pcre2_dfa_match() matches in a different way and is not Perl-compatible. - (k) PCRE2 recognizes some special sequences such as (*CR) at the start + (l) PCRE2 recognizes some special sequences such as (*CR) at the start of a pattern that set overall options that cannot be changed within the pattern. @@ -3851,8 +4269,8 @@ AUTHOR REVISION - Last updated: 15 March 2015 - Copyright (c) 1997-2015 University of Cambridge. + Last updated: 18 October 2016 + Copyright (c) 1997-2016 University of Cambridge. ------------------------------------------------------------------------------ @@ -3922,6 +4340,12 @@ SIMPLE USE OF JIT exactly the same results. The returned value from pcre2_jit_compile() is zero on success, or a negative error code. + There is a limit to the size of pattern that JIT supports, imposed by + the size of machine stack that it uses. The exact rules are not docu- + mented because they may change at any time, in particular, when new + optimizations are introduced. If a pattern is too big, a call to + pcre2_jit_compile() returns PCRE2_ERROR_NOMEMORY. + PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com- plete matches. If you want to run partial matches using the PCRE2_PAR- TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should @@ -3975,49 +4399,52 @@ UNSUPPORTED OPTIONS AND PATTERN ITEMS PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED option is not supported at match time. - The only unsupported pattern items are \C (match a single data unit) - when running in a UTF mode, and a callout immediately before an asser- + If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the + use of JIT, forcing matching by the interpreter code. + + The only unsupported pattern items are \C (match a single data unit) + when running in a UTF mode, and a callout immediately before an asser- tion condition in a conditional group. RETURN VALUES FROM JIT MATCHING When a pattern is matched using JIT matching, the return values are the - same as those given by the interpretive pcre2_match() code, with the - addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means - that the memory used for the JIT stack was insufficient. See "Control- + same as those given by the interpretive pcre2_match() code, with the + addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means + that the memory used for the JIT stack was insufficient. See "Control- ling the JIT stack" below for a discussion of JIT stack usage. - The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if - searching a very large pattern tree goes on for too long, as it is in - the same circumstance when JIT is not used, but the details of exactly - what is counted are not the same. The PCRE2_ERROR_RECURSIONLIMIT error + The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if + searching a very large pattern tree goes on for too long, as it is in + the same circumstance when JIT is not used, but the details of exactly + what is counted are not the same. The PCRE2_ERROR_RECURSIONLIMIT error code is never returned when JIT matching is used. CONTROLLING THE JIT STACK When the compiled JIT code runs, it needs a block of memory to use as a - stack. By default, it uses 32K on the machine stack. However, some - large or complicated patterns need more than this. The error - PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack. - Three functions are provided for managing blocks of memory for use as - JIT stacks. There is further discussion about the use of JIT stacks in + stack. By default, it uses 32K on the machine stack. However, some + large or complicated patterns need more than this. The error + PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack. + Three functions are provided for managing blocks of memory for use as + JIT stacks. There is further discussion about the use of JIT stacks in the section entitled "JIT stack FAQ" below. - The pcre2_jit_stack_create() function creates a JIT stack. Its argu- - ments are a starting size, a maximum size, and a general context (for - memory allocation functions, or NULL for standard memory allocation). + The pcre2_jit_stack_create() function creates a JIT stack. Its argu- + ments are a starting size, a maximum size, and a general context (for + memory allocation functions, or NULL for standard memory allocation). It returns a pointer to an opaque structure of type pcre2_jit_stack, or - NULL if there is an error. The pcre2_jit_stack_free() function is used - to free a stack that is no longer needed. (For the technically minded: + NULL if there is an error. The pcre2_jit_stack_free() function is used + to free a stack that is no longer needed. (For the technically minded: the address space is allocated by mmap or VirtualAlloc.) - JIT uses far less memory for recursion than the interpretive code, and - a maximum stack size of 512K to 1M should be more than enough for any + JIT uses far less memory for recursion than the interpretive code, and + a maximum stack size of 512K to 1M should be more than enough for any pattern. - The pcre2_jit_stack_assign() function specifies which stack JIT code + The pcre2_jit_stack_assign() function specifies which stack JIT code should use. Its arguments are as follows: pcre2_match_context *mcontext @@ -4026,7 +4453,7 @@ CONTROLLING THE JIT STACK The first argument is a pointer to a match context. When this is subse- quently passed to a matching function, its information determines which - JIT stack is used. There are three cases for the values of the other + JIT stack is used. There are three cases for the values of the other two options: (1) If callback is NULL and data is NULL, an internal 32K block @@ -4044,30 +4471,34 @@ CONTROLLING THE JIT STACK return value must be a valid JIT stack, the result of calling pcre2_jit_stack_create(). - A callback function is obeyed whenever JIT code is about to be run; it + A callback function is obeyed whenever JIT code is about to be run; it is not obeyed when pcre2_match() is called with options that are incom- - patible for JIT matching. A callback function can therefore be used to - determine whether a match operation was executed by JIT or by the + patible for JIT matching. A callback function can therefore be used to + determine whether a match operation was executed by JIT or by the interpreter. You may safely use the same JIT stack for more than one pattern (either - by assigning directly or by callback), as long as the patterns are all - matched sequentially in the same thread. In a multithread application, - if you do not specify a JIT stack, or if you assign or pass back NULL - from a callback, that is thread-safe, because each thread has its own - machine stack. However, if you assign or pass back a non-NULL JIT - stack, this must be a different stack for each thread so that the - application is thread-safe. + by assigning directly or by callback), as long as the patterns are + matched sequentially in the same thread. Currently, the only way to set + up non-sequential matches in one thread is to use callouts: if a call- + out function starts another match, that match must use a different JIT + stack to the one used for currently suspended match(es). - Strictly speaking, even more is allowed. You can assign the same non- - NULL stack to a match context that is used by any number of patterns, - as long as they are not used for matching by multiple threads at the - same time. For example, you could use the same stack in all compiled - patterns, with a global mutex in the callback to wait until the stack + In a multithread application, if you do not specify a JIT stack, or if + you assign or pass back NULL from a callback, that is thread-safe, + because each thread has its own machine stack. However, if you assign + or pass back a non-NULL JIT stack, this must be a different stack for + each thread so that the application is thread-safe. + + Strictly speaking, even more is allowed. You can assign the same non- + NULL stack to a match context that is used by any number of patterns, + as long as they are not used for matching by multiple threads at the + same time. For example, you could use the same stack in all compiled + patterns, with a global mutex in the callback to wait until the stack is available for use. However, this is an inefficient solution, and not recommended. - This is a suggestion for how a multithreaded program that needs to set + This is a suggestion for how a multithreaded program that needs to set up non-default JIT stacks might operate: During thread initalization @@ -4079,7 +4510,7 @@ CONTROLLING THE JIT STACK Use a one-line callback function return thread_local_var - All the functions described in this section do nothing if JIT is not + All the functions described in this section do nothing if JIT is not available. @@ -4088,20 +4519,20 @@ JIT STACK FAQ (1) Why do we need JIT stacks? PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack - where the local data of the current node is pushed before checking its + where the local data of the current node is pushed before checking its child nodes. Allocating real machine stack on some platforms is diffi- cult. For example, the stack chain needs to be updated every time if we - extend the stack on PowerPC. Although it is possible, its updating + extend the stack on PowerPC. Although it is possible, its updating time overhead decreases performance. So we do the recursion in memory. (2) Why don't we simply allocate blocks of memory with malloc()? - Modern operating systems have a nice feature: they can reserve an + Modern operating systems have a nice feature: they can reserve an address space instead of allocating memory. We can safely allocate mem- - ory pages inside this address space, so the stack could grow without + ory pages inside this address space, so the stack could grow without moving memory data (this is important because of pointers). Thus we can - allocate 1M address space, and use only a single memory page (usually - 4K) if that is enough. However, we can still grow up to 1M anytime if + allocate 1M address space, and use only a single memory page (usually + 4K) if that is enough. However, we can still grow up to 1M anytime if needed. (3) Who "owns" a JIT stack? @@ -4109,8 +4540,8 @@ JIT STACK FAQ The owner of the stack is the user program, not the JIT studied pattern or anything else. The user program must ensure that if a stack is being used by pcre2_match(), (that is, it is assigned to a match context that - is passed to the pattern currently running), that stack must not be - used by any other threads (to avoid overwriting the same memory area). + is passed to the pattern currently running), that stack must not be + used by any other threads (to avoid overwriting the same memory area). The best practice for multithreaded programs is to allocate a stack for each thread, and return this stack through the JIT callback function. @@ -4118,36 +4549,36 @@ JIT STACK FAQ You can free a JIT stack at any time, as long as it will not be used by pcre2_match() again. When you assign the stack to a match context, only - a pointer is set. There is no reference counting or any other magic. + a pointer is set. There is no reference counting or any other magic. You can free compiled patterns, contexts, and stacks in any order, any- - time. Just do not call pcre2_match() with a match context pointing to + time. Just do not call pcre2_match() with a match context pointing to an already freed stack, as that will cause SEGFAULT. (Also, do not free - a stack currently used by pcre2_match() in another thread). You can - also replace the stack in a context at any time when it is not in use. + a stack currently used by pcre2_match() in another thread). You can + also replace the stack in a context at any time when it is not in use. You should free the previous stack before assigning a replacement. - (5) Should I allocate/free a stack every time before/after calling + (5) Should I allocate/free a stack every time before/after calling pcre2_match()? - No, because this is too costly in terms of resources. However, you - could implement some clever idea which release the stack if it is not - used in let's say two minutes. The JIT callback can help to achieve + No, because this is too costly in terms of resources. However, you + could implement some clever idea which release the stack if it is not + used in let's say two minutes. The JIT callback can help to achieve this without keeping a list of patterns. - (6) OK, the stack is for long term memory allocation. But what happens - if a pattern causes stack overflow with a stack of 1M? Is that 1M kept + (6) OK, the stack is for long term memory allocation. But what happens + if a pattern causes stack overflow with a stack of 1M? Is that 1M kept until the stack is freed? - Especially on embedded sytems, it might be a good idea to release mem- - ory sometimes without freeing the stack. There is no API for this at - the moment. Probably a function call which returns with the currently - allocated memory for any stack and another which allows releasing mem- + Especially on embedded sytems, it might be a good idea to release mem- + ory sometimes without freeing the stack. There is no API for this at + the moment. Probably a function call which returns with the currently + allocated memory for any stack and another which allows releasing mem- ory (shrinking the stack) would be a good idea if someone needs this. (7) This is too much of a headache. Isn't there any better solution for JIT stack handling? - No, thanks to Windows. If POSIX threads were used everywhere, we could + No, thanks to Windows. If POSIX threads were used everywhere, we could throw out this complicated API. @@ -4156,18 +4587,18 @@ FREEING JIT SPECULATIVE MEMORY void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); The JIT executable allocator does not free all memory when it is possi- - ble. It expects new allocations, and keeps some free memory around to - improve allocation speed. However, in low memory conditions, it might - be better to free all possible memory. You can cause this to happen by - calling pcre2_jit_free_unused_memory(). Its argument is a general con- + ble. It expects new allocations, and keeps some free memory around to + improve allocation speed. However, in low memory conditions, it might + be better to free all possible memory. You can cause this to happen by + calling pcre2_jit_free_unused_memory(). Its argument is a general con- text, for custom memory management, or NULL for standard memory manage- ment. EXAMPLE CODE - This is a single-threaded example that specifies a JIT stack without - using a callback. A real program should include error checking after + This is a single-threaded example that specifies a JIT stack without + using a callback. A real program should include error checking after all the function calls. int rc; @@ -4195,19 +4626,20 @@ EXAMPLE CODE JIT FAST PATH API Because the API described above falls back to interpreted matching when - JIT is not available, it is convenient for programs that are written + JIT is not available, it is convenient for programs that are written for general use in many environments. However, calling JIT via pcre2_match() does have a performance impact. Programs that are written - for use where JIT is known to be available, and which need the best - possible performance, can instead use a "fast path" API to call JIT - matching directly instead of calling pcre2_match() (obviously only for + for use where JIT is known to be available, and which need the best + possible performance, can instead use a "fast path" API to call JIT + matching directly instead of calling pcre2_match() (obviously only for patterns that have been successfully processed by pcre2_jit_compile()). - The fast path function is called pcre2_jit_match(), and it takes + The fast path function is called pcre2_jit_match(), and it takes exactly the same arguments as pcre2_match(). The return values are also the same, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or - complete) is requested that was not compiled. Unsupported option bits - (for example, PCRE2_ANCHORED) are ignored. + complete) is requested that was not compiled. Unsupported option bits + (for example, PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT + option. When you call pcre2_match(), as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For exam- @@ -4234,8 +4666,8 @@ AUTHOR REVISION - Last updated: 27 November 2014 - Copyright (c) 1997-2014 University of Cambridge. + Last updated: 05 June 2016 + Copyright (c) 1997-2016 University of Cambridge. ------------------------------------------------------------------------------ @@ -4262,6 +4694,10 @@ SIZE AND OTHER LIMITATIONS of execution is slower. In the 32-bit library, the internal linkage size is always 4. + The maximum length of a source pattern string is essentially unlimited; + it is the largest number a PCRE2_SIZE variable can hold. However, the + program that calls pcre2_compile() can specify a smaller limit. + The maximum length (in code units) of a subject string is one less than the largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned integer type, usually defined as size_t. Its maximum value @@ -4276,25 +4712,26 @@ SIZE AND OTHER LIMITATIONS All values in repeating quantifiers must be less than 65536. + The maximum length of a lookbehind assertion is 65535 characters. + There is no limit to the number of parenthesized subpatterns, but there can be no more than 65535 capturing subpatterns. There is, however, a limit to the depth of nesting of parenthesized subpatterns of all kinds. This is imposed in order to limit the amount of system stack - used at compile time. The limit can be specified when PCRE2 is built; - the default is 250. - - There is a limit to the number of forward references to subsequent sub- - patterns of around 200,000. Repeated forward references with fixed - upper limits, for example, (?2){0,100} when subpattern number 2 is to - the right, are included in the count. There is no limit to the number - of backward references. + used at compile time. The default limit can be specified when PCRE2 is + built; the default default is 250. An application can change this limit + by calling pcre2_set_parens_nest_limit() to set the limit in a compile + context. The maximum length of name for a named subpattern is 32 code units, and the maximum number of named subpatterns is 10000. The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or - (*THEN) verb is 255 for the 8-bit library and 65535 for the 16-bit and - 32-bit libraries. + (*THEN) verb is 255 code units for the 8-bit library and 65535 code + units for the 16-bit and 32-bit libraries. + + The maximum length of a string argument to a callout is the largest + number a 32-bit unsigned integer can hold. AUTHOR @@ -4306,8 +4743,8 @@ AUTHOR REVISION - Last updated: 25 November 2014 - Copyright (c) 1997-2014 University of Cambridge. + Last updated: 26 October 2016 + Copyright (c) 1997-2016 University of Cambridge. ------------------------------------------------------------------------------ @@ -4970,6 +5407,4432 @@ REVISION ------------------------------------------------------------------------------ +PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3) + + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + +PCRE2 REGULAR EXPRESSION DETAILS + + The syntax and semantics of the regular expressions that are supported + by PCRE2 are described in detail below. There is a quick-reference syn- + tax summary in the pcre2syntax page. PCRE2 tries to match Perl syntax + and semantics as closely as it can. PCRE2 also supports some alterna- + tive regular expression syntax (which does not conflict with the Perl + syntax) in order to provide some compatibility with regular expressions + in Python, .NET, and Oniguruma. + + Perl's regular expressions are described in its own documentation, and + regular expressions in general are covered in a number of books, some + of which have copious examples. Jeffrey Friedl's "Mastering Regular + Expressions", published by O'Reilly, covers regular expressions in + great detail. This description of PCRE2's regular expressions is + intended as reference material. + + This document discusses the patterns that are supported by PCRE2 when + its main matching function, pcre2_match(), is used. PCRE2 also has an + alternative matching function, pcre2_dfa_match(), which matches using a + different algorithm that is not Perl-compatible. Some of the features + discussed below are not available when DFA matching is used. The advan- + tages and disadvantages of the alternative function, and how it differs + from the normal function, are discussed in the pcre2matching page. + + +SPECIAL START-OF-PATTERN ITEMS + + A number of options that can be passed to pcre2_compile() can also be + set by special items at the start of a pattern. These are not Perl-com- + patible, but are provided to make these options accessible to pattern + writers who are not able to change the program that processes the pat- + tern. Any number of these items may appear, but they must all be + together right at the start of the pattern string, and the letters must + be in upper case. + + UTF support + + In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either + as single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 + can be specified for the 32-bit library, in which case it constrains + the character values to valid Unicode code points. To process UTF + strings, PCRE2 must be built to include Unicode support (which is the + default). When using UTF strings you must either call the compiling + function with the PCRE2_UTF option, or the pattern must start with the + special sequence (*UTF), which is equivalent to setting the relevant + option. How setting a UTF mode affects pattern matching is mentioned in + several places below. There is also a summary of features in the + pcre2unicode page. + + Some applications that allow their users to supply patterns may wish to + restrict them to non-UTF data for security reasons. If the + PCRE2_NEVER_UTF option is passed to pcre2_compile(), (*UTF) is not + allowed, and its appearance in a pattern causes an error. + + Unicode property support + + Another special sequence that may appear at the start of a pattern is + (*UCP). This has the same effect as setting the PCRE2_UCP option: it + causes sequences such as \d and \w to use Unicode properties to deter- + mine character types, instead of recognizing only characters with codes + less than 256 via a lookup table. + + Some applications that allow their users to supply patterns may wish to + restrict them for security reasons. If the PCRE2_NEVER_UCP option is + passed to pcre2_compile(), (*UCP) is not allowed, and its appearance in + a pattern causes an error. + + Locking out empty string matching + + Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same + effect as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option + to whichever matching function is subsequently called to match the pat- + tern. These options lock out the matching of empty strings, either + entirely, or only at the start of the subject. + + Disabling auto-possessification + + If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as + setting the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making + quantifiers possessive when what follows cannot match the repeated + item. For example, by default a+b is treated as a++b. For more details, + see the pcre2api documentation. + + Disabling start-up optimizations + + If a pattern starts with (*NO_START_OPT), it has the same effect as + setting the PCRE2_NO_START_OPTIMIZE option. This disables several opti- + mizations for quickly reaching "no match" results. For more details, + see the pcre2api documentation. + + Disabling automatic anchoring + + If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect + as setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimiza- + tions that apply to patterns whose top-level branches all start with .* + (match any number of arbitrary characters). For more details, see the + pcre2api documentation. + + Disabling JIT compilation + + If a pattern that starts with (*NO_JIT) is successfully compiled, an + attempt by the application to apply the JIT optimization by calling + pcre2_jit_compile() is ignored. + + Setting match and recursion limits + + The caller of pcre2_match() can set a limit on the number of times the + internal match() function is called and on the maximum depth of recur- + sive calls. These facilities are provided to catch runaway matches that + are provoked by patterns with huge matching trees (a typical example is + a pattern with nested unlimited repeats) and to avoid running out of + system stack by too much recursion. When one of these limits is + reached, pcre2_match() gives an error return. The limits can also be + set by items at the start of the pattern of the form + + (*LIMIT_MATCH=d) + (*LIMIT_RECURSION=d) + + where d is any number of decimal digits. However, the value of the set- + ting must be less than the value set (or defaulted) by the caller of + pcre2_match() for it to have any effect. In other words, the pattern + writer can lower the limits set by the programmer, but not raise them. + If there is more than one setting of one of these limits, the lower + value is used. + + The match limit is used (but in a different way) when JIT is being + used, but it is not relevant, and is ignored, when matching with + pcre2_dfa_match(). However, the recursion limit is relevant for DFA + matching, which does use some function recursion, in particular, for + recursions within the pattern. + + Newline conventions + + PCRE2 supports five different conventions for indicating line breaks in + strings: a single CR (carriage return) character, a single LF (line- + feed) character, the two-character sequence CRLF, any of the three pre- + ceding, or any Unicode newline sequence. The pcre2api page has further + discussion about newlines, and shows how to set the newline convention + when calling pcre2_compile(). + + It is also possible to specify a newline convention by starting a pat- + tern string with one of the following five sequences: + + (*CR) carriage return + (*LF) linefeed + (*CRLF) carriage return, followed by linefeed + (*ANYCRLF) any of the three above + (*ANY) all Unicode newline sequences + + These override the default and the options given to the compiling func- + tion. For example, on a Unix system where LF is the default newline + sequence, the pattern + + (*CR)a.b + + changes the convention to CR. That pattern matches "a\nb" because LF is + no longer a newline. If more than one of these settings is present, the + last one is used. + + The newline convention affects where the circumflex and dollar asser- + tions are true. It also affects the interpretation of the dot metachar- + acter when PCRE2_DOTALL is not set, and the behaviour of \N. However, + it does not affect what the \R escape sequence matches. By default, + this is any Unicode newline sequence, for Perl compatibility. However, + this can be changed; see the description of \R in the section entitled + "Newline sequences" below. A change of \R setting can be combined with + a change of newline convention. + + Specifying what \R matches + + It is possible to restrict \R to match only CR, LF, or CRLF (instead of + the complete set of Unicode line endings) by setting the option + PCRE2_BSR_ANYCRLF at compile time. This effect can also be achieved by + starting a pattern with (*BSR_ANYCRLF). For completeness, (*BSR_UNI- + CODE) is also recognized, corresponding to PCRE2_BSR_UNICODE. + + +EBCDIC CHARACTER CODES + + PCRE2 can be compiled to run in an environment that uses EBCDIC as its + character code rather than ASCII or Unicode (typically a mainframe sys- + tem). In the sections below, character code values are ASCII or Uni- + code; in an EBCDIC environment these characters may have different code + values, and there are no code points greater than 255. + + +CHARACTERS AND METACHARACTERS + + A regular expression is a pattern that is matched against a subject + string from left to right. Most characters stand for themselves in a + pattern, and match the corresponding characters in the subject. As a + trivial example, the pattern + + The quick brown fox + + matches a portion of a subject string that is identical to itself. When + caseless matching is specified (the PCRE2_CASELESS option), letters are + matched independently of case. + + The power of regular expressions comes from the ability to include + alternatives and repetitions in the pattern. These are encoded in the + pattern by the use of metacharacters, which do not stand for themselves + but instead are interpreted in some special way. + + There are two different sets of metacharacters: those that are recog- + nized anywhere in the pattern except within square brackets, and those + that are recognized within square brackets. Outside square brackets, + the metacharacters are as follows: + + \ general escape character with several uses + ^ assert start of string (or line, in multiline mode) + $ assert end of string (or line, in multiline mode) + . match any character except newline (by default) + [ start character class definition + | start of alternative branch + ( start subpattern + ) end subpattern + ? extends the meaning of ( + also 0 or 1 quantifier + also quantifier minimizer + * 0 or more quantifier + + 1 or more quantifier + also "possessive quantifier" + { start min/max quantifier + + Part of a pattern that is in square brackets is called a "character + class". In a character class the only metacharacters are: + + \ general escape character + ^ negate the class, but only if the first character + - indicates character range + [ POSIX character class (only if followed by POSIX + syntax) + ] terminates the character class + + The following sections describe the use of each of the metacharacters. + + +BACKSLASH + + The backslash character has several uses. Firstly, if it is followed by + a character that is not a number or a letter, it takes away any special + meaning that character may have. This use of backslash as an escape + character applies both inside and outside character classes. + + For example, if you want to match a * character, you write \* in the + pattern. This escaping action applies whether or not the following + character would otherwise be interpreted as a metacharacter, so it is + always safe to precede a non-alphanumeric with backslash to specify + that it stands for itself. In particular, if you want to match a back- + slash, you write \\. + + In a UTF mode, only ASCII numbers and letters have any special meaning + after a backslash. All other characters (in particular, those whose + codepoints are greater than 127) are treated as literals. + + If a pattern is compiled with the PCRE2_EXTENDED option, most white + space in the pattern (other than in a character class), and characters + between a # outside a character class and the next newline, inclusive, + are ignored. An escaping backslash can be used to include a white space + or # character as part of the pattern. + + If you want to remove the special meaning from a sequence of charac- + ters, you can do so by putting them between \Q and \E. This is differ- + ent from Perl in that $ and @ are handled as literals in \Q...\E + sequences in PCRE2, whereas in Perl, $ and @ cause variable interpola- + tion. Note the following examples: + + Pattern PCRE2 matches Perl matches + + \Qabc$xyz\E abc$xyz abc followed by the + contents of $xyz + \Qabc\$xyz\E abc\$xyz abc\$xyz + \Qabc\E\$\Qxyz\E abc$xyz abc$xyz + + The \Q...\E sequence is recognized both inside and outside character + classes. An isolated \E that is not preceded by \Q is ignored. If \Q + is not followed by \E later in the pattern, the literal interpretation + continues to the end of the pattern (that is, \E is assumed at the + end). If the isolated \Q is inside a character class, this causes an + error, because the character class is not terminated. + + Non-printing characters + + A second use of backslash provides a way of encoding non-printing char- + acters in patterns in a visible manner. There is no restriction on the + appearance of non-printing characters in a pattern, but when a pattern + is being prepared by text editing, it is often easier to use one of the + following escape sequences than the binary character it represents. In + an ASCII or Unicode environment, these escapes are as follows: + + \a alarm, that is, the BEL character (hex 07) + \cx "control-x", where x is any printable ASCII character + \e escape (hex 1B) + \f form feed (hex 0C) + \n linefeed (hex 0A) + \r carriage return (hex 0D) + \t tab (hex 09) + \0dd character with octal code 0dd + \ddd character with octal code ddd, or back reference + \o{ddd..} character with octal code ddd.. + \xhh character with hex code hh + \x{hhh..} character with hex code hhh.. (default mode) + \uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set) + + The precise effect of \cx on ASCII characters is as follows: if x is a + lower case letter, it is converted to upper case. Then bit 6 of the + character (hex 40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A + (A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and \c; becomes + hex 7B (; is 3B). If the code unit following \c has a value less than + 32 or greater than 126, a compile-time error occurs. + + When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t gen- + erate the appropriate EBCDIC code values. The \c escape is processed as + specified for Perl in the perlebcdic document. The only characters that + are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. + Any other character provokes a compile-time error. The sequence \c@ + encodes character code 0; after \c the letters (in either case) encode + characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters + 27-31 (hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95 + (hex 5F). + + Thus, apart from \c?, these escapes generate the same character code + values as they do in an ASCII environment, though the meanings of the + values mostly differ. For example, \cG always generates code value 7, + which is BEL in ASCII but DEL in EBCDIC. + + The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, + but because 127 is not a control character in EBCDIC, Perl makes it + generate the APC character. Unfortunately, there are several variants + of EBCDIC. In most of them the APC character has the value 255 (hex + FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If + certain other characters have POSIX-BC values, PCRE2 makes \c? generate + 95; otherwise it generates 255. + + After \0 up to two further octal digits are read. If there are fewer + than two digits, just those that are present are used. Thus the + sequence \0\x\015 specifies two binary zeros followed by a CR character + (code value 13). Make sure you supply two digits after the initial zero + if the pattern character that follows is itself an octal digit. + + The escape \o must be followed by a sequence of octal digits, enclosed + in braces. An error occurs if this is not the case. This escape is a + recent addition to Perl; it provides way of specifying character code + points as octal numbers greater than 0777, and it also allows octal + numbers and back references to be unambiguously specified. + + For greater clarity and unambiguity, it is best to avoid following \ by + a digit greater than zero. Instead, use \o{} or \x{} to specify charac- + ter numbers, and \g{} to specify back references. The following para- + graphs describe the old, ambiguous syntax. + + The handling of a backslash followed by a digit other than 0 is compli- + cated, and Perl has changed over time, causing PCRE2 also to change. + + Outside a character class, PCRE2 reads the digit and any following dig- + its as a decimal number. If the number is less than 10, begins with the + digit 8 or 9, or if there are at least that many previous capturing + left parentheses in the expression, the entire sequence is taken as a + back reference. A description of how this works is given later, follow- + ing the discussion of parenthesized subpatterns. Otherwise, up to + three octal digits are read to form a character code. + + Inside a character class, PCRE2 handles \8 and \9 as the literal char- + acters "8" and "9", and otherwise reads up to three octal digits fol- + lowing the backslash, using them to generate a data character. Any sub- + sequent digits stand for themselves. For example, outside a character + class: + + \040 is another way of writing an ASCII space + \40 is the same, provided there are fewer than 40 + previous capturing subpatterns + \7 is always a back reference + \11 might be a back reference, or another way of + writing a tab + \011 is always a tab + \0113 is a tab followed by the character "3" + \113 might be a back reference, otherwise the + character with octal code 113 + \377 might be a back reference, otherwise + the value 255 (decimal) + \81 is always a back reference + + Note that octal values of 100 or greater that are specified using this + syntax must not be introduced by a leading zero, because no more than + three octal digits are ever read. + + By default, after \x that is not followed by {, from zero to two hexa- + decimal digits are read (letters can be in upper or lower case). Any + number of hexadecimal digits may appear between \x{ and }. If a charac- + ter other than a hexadecimal digit appears between \x{ and }, or if + there is no terminating }, an error occurs. + + If the PCRE2_ALT_BSUX option is set, the interpretation of \x is as + just described only when it is followed by two hexadecimal digits. Oth- + erwise, it matches a literal "x" character. In this mode mode, support + for code points greater than 256 is provided by \u, which must be fol- + lowed by four hexadecimal digits; otherwise it matches a literal "u" + character. + + Characters whose value is less than 256 can be defined by either of the + two syntaxes for \x (or by \u in PCRE2_ALT_BSUX mode). There is no dif- + ference in the way they are handled. For example, \xdc is exactly the + same as \x{dc} (or \u00dc in PCRE2_ALT_BSUX mode). + + Constraints on character values + + Characters that are specified using octal or hexadecimal numbers are + limited to certain values, as follows: + + 8-bit non-UTF mode less than 0x100 + 8-bit UTF-8 mode less than 0x10ffff and a valid codepoint + 16-bit non-UTF mode less than 0x10000 + 16-bit UTF-16 mode less than 0x10ffff and a valid codepoint + 32-bit non-UTF mode less than 0x100000000 + 32-bit UTF-32 mode less than 0x10ffff and a valid codepoint + + Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so- + called "surrogate" codepoints), and 0xffef. + + Escape sequences in character classes + + All the sequences that define a single character value can be used both + inside and outside character classes. In addition, inside a character + class, \b is interpreted as the backspace character (hex 08). + + \N is not allowed in a character class. \B, \R, and \X are not special + inside a character class. Like other unrecognized alphabetic escape + sequences, they cause an error. Outside a character class, these + sequences have different meanings. + + Unsupported escape sequences + + In Perl, the sequences \l, \L, \u, and \U are recognized by its string + handler and used to modify the case of following characters. By + default, PCRE2 does not support these escape sequences. However, if the + PCRE2_ALT_BSUX option is set, \U matches a "U" character, and \u can be + used to define a character by code point, as described in the previous + section. + + Absolute and relative back references + + The sequence \g followed by a signed or unsigned number, optionally + enclosed in braces, is an absolute or relative back reference. A named + back reference can be coded as \g{name}. Back references are discussed + later, following the discussion of parenthesized subpatterns. + + Absolute and relative subroutine calls + + For compatibility with Oniguruma, the non-Perl syntax \g followed by a + name or a number enclosed either in angle brackets or single quotes, is + an alternative syntax for referencing a subpattern as a "subroutine". + Details are discussed later. Note that \g{...} (Perl syntax) and + \g<...> (Oniguruma syntax) are not synonymous. The former is a back + reference; the latter is a subroutine call. + + Generic character types + + Another use of backslash is for specifying generic character types: + + \d any decimal digit + \D any character that is not a decimal digit + \h any horizontal white space character + \H any character that is not a horizontal white space character + \s any white space character + \S any character that is not a white space character + \v any vertical white space character + \V any character that is not a vertical white space character + \w any "word" character + \W any "non-word" character + + There is also the single sequence \N, which matches a non-newline char- + acter. This is the same as the "." metacharacter when PCRE2_DOTALL is + not set. Perl also uses \N to match characters by name; PCRE2 does not + support this. + + Each pair of lower and upper case escape sequences partitions the com- + plete set of characters into two disjoint sets. Any given character + matches one, and only one, of each pair. The sequences can appear both + inside and outside character classes. They each match one character of + the appropriate type. If the current matching point is at the end of + the subject string, all of them fail, because there is no character to + match. + + The default \s characters are HT (9), LF (10), VT (11), FF (12), CR + (13), and space (32), which are defined as white space in the "C" + locale. This list may vary if locale-specific matching is taking place. + For example, in some locales the "non-breaking space" character (\xA0) + is recognized as white space, and in others the VT character is not. + + A "word" character is an underscore or any character that is a letter + or digit. By default, the definition of letters and digits is con- + trolled by PCRE2's low-valued character tables, and may vary if locale- + specific matching is taking place (see "Locale support" in the pcre2api + page). For example, in a French locale such as "fr_FR" in Unix-like + systems, or "french" in Windows, some character codes greater than 127 + are used for accented letters, and these are then matched by \w. The + use of locales with Unicode is discouraged. + + By default, characters whose code points are greater than 127 never + match \d, \s, or \w, and always match \D, \S, and \W, although this may + be different for characters in the range 128-255 when locale-specific + matching is happening. These escape sequences retain their original + meanings from before Unicode support was available, mainly for effi- + ciency reasons. If the PCRE2_UCP option is set, the behaviour is + changed so that Unicode properties are used to determine character + types, as follows: + + \d any character that matches \p{Nd} (decimal digit) + \s any character that matches \p{Z} or \h or \v + \w any character that matches \p{L} or \p{N}, plus underscore + + The upper case escapes match the inverse sets of characters. Note that + \d matches only decimal digits, whereas \w matches any Unicode digit, + as well as any Unicode letter, and underscore. Note also that PCRE2_UCP + affects \b, and \B because they are defined in terms of \w and \W. + Matching these sequences is noticeably slower when PCRE2_UCP is set. + + The sequences \h, \H, \v, and \V, in contrast to the other sequences, + which match only ASCII characters by default, always match a specific + list of code points, whether or not PCRE2_UCP is set. The horizontal + space characters are: + + U+0009 Horizontal tab (HT) + U+0020 Space + U+00A0 Non-break space + U+1680 Ogham space mark + U+180E Mongolian vowel separator + U+2000 En quad + U+2001 Em quad + U+2002 En space + U+2003 Em space + U+2004 Three-per-em space + U+2005 Four-per-em space + U+2006 Six-per-em space + U+2007 Figure space + U+2008 Punctuation space + U+2009 Thin space + U+200A Hair space + U+202F Narrow no-break space + U+205F Medium mathematical space + U+3000 Ideographic space + + The vertical space characters are: + + U+000A Linefeed (LF) + U+000B Vertical tab (VT) + U+000C Form feed (FF) + U+000D Carriage return (CR) + U+0085 Next line (NEL) + U+2028 Line separator + U+2029 Paragraph separator + + In 8-bit, non-UTF-8 mode, only the characters with code points less + than 256 are relevant. + + Newline sequences + + Outside a character class, by default, the escape sequence \R matches + any Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent + to the following: + + (?>\r\n|\n|\x0b|\f|\r|\x85) + + This is an example of an "atomic group", details of which are given + below. This particular group matches either the two-character sequence + CR followed by LF, or one of the single characters LF (linefeed, + U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (car- + riage return, U+000D), or NEL (next line, U+0085). Because this is an + atomic group, the two-character sequence is treated as a single unit + that cannot be split. + + In other modes, two additional characters whose codepoints are greater + than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa- + rator, U+2029). Unicode support is not needed for these characters to + be recognized. + + It is possible to restrict \R to match only CR, LF, or CRLF (instead of + the complete set of Unicode line endings) by setting the option + PCRE2_BSR_ANYCRLF at compile time. (BSR is an abbrevation for "back- + slash R".) This can be made the default when PCRE2 is built; if this is + the case, the other behaviour can be requested via the PCRE2_BSR_UNI- + CODE option. It is also possible to specify these settings by starting + a pattern string with one of the following sequences: + + (*BSR_ANYCRLF) CR, LF, or CRLF only + (*BSR_UNICODE) any Unicode newline sequence + + These override the default and the options given to the compiling func- + tion. Note that these special settings, which are not Perl-compatible, + are recognized only at the very start of a pattern, and that they must + be in upper case. If more than one of them is present, the last one is + used. They can be combined with a change of newline convention; for + example, a pattern can start with: + + (*ANY)(*BSR_ANYCRLF) + + They can also be combined with the (*UTF) or (*UCP) special sequences. + Inside a character class, \R is treated as an unrecognized escape + sequence, and causes an error. + + Unicode character properties + + When PCRE2 is built with Unicode support (the default), three addi- + tional escape sequences that match characters with specific properties + are available. In 8-bit non-UTF-8 mode, these sequences are of course + limited to testing characters whose codepoints are less than 256, but + they do work in this mode. The extra escape sequences are: + + \p{xx} a character with the xx property + \P{xx} a character without the xx property + \X a Unicode extended grapheme cluster + + The property names represented by xx above are limited to the Unicode + script names, the general category properties, "Any", which matches any + character (including newline), and some special PCRE2 properties + (described in the next section). Other Perl properties such as "InMu- + sicalSymbols" are not supported by PCRE2. Note that \P{Any} does not + match any characters, so always causes a match failure. + + Sets of Unicode characters are defined as belonging to certain scripts. + A character from one of these sets can be matched using a script name. + For example: + + \p{Greek} + \P{Han} + + Those that are not part of an identified script are lumped together as + "Common". The current list of scripts is: + + Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Balinese, + Bamum, Bassa_Vah, Batak, Bengali, Bopomofo, Brahmi, Braille, Buginese, + Buhid, Canadian_Aboriginal, Carian, Caucasian_Albanian, Chakma, Cham, + Cherokee, Common, Coptic, Cuneiform, Cypriot, Cyrillic, Deseret, + Devanagari, Duployan, Egyptian_Hieroglyphs, Elbasan, Ethiopic, Geor- + gian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gurmukhi, Han, + Hangul, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited, + Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan- + nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, + Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha- + jani, Malayalam, Mandaic, Manichaean, Meetei_Mayek, Mende_Kikakui, + Meroitic_Cursive, Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, + Multani, Myanmar, Nabataean, New_Tai_Lue, Nko, Ogham, Ol_Chiki, + Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, + Old_South_Arabian, Old_Turkic, Oriya, Osmanya, Pahawh_Hmong, Palmyrene, + Pau_Cin_Hau, Phags_Pa, Phoenician, Psalter_Pahlavi, Rejang, Runic, + Samaritan, Saurashtra, Sharada, Shavian, Siddham, SignWriting, Sinhala, + Sora_Sompeng, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, + Tai_Le, Tai_Tham, Tai_Viet, Takri, Tamil, Telugu, Thaana, Thai, + Tibetan, Tifinagh, Tirhuta, Ugaritic, Vai, Warang_Citi, Yi. + + Each character has exactly one Unicode general category property, spec- + ified by a two-letter abbreviation. For compatibility with Perl, nega- + tion can be specified by including a circumflex between the opening + brace and the property name. For example, \p{^Lu} is the same as + \P{Lu}. + + If only one letter is specified with \p or \P, it includes all the gen- + eral category properties that start with that letter. In this case, in + the absence of negation, the curly brackets in the escape sequence are + optional; these two examples have the same effect: + + \p{L} + \pL + + The following general category property codes are supported: + + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate + + L Letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter + + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark + + N Number + Nd Decimal number + Nl Letter number + No Other number + + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation + + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol + + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator + + The special property L& is also supported: it matches a character that + has the Lu, Ll, or Lt property, in other words, a letter that is not + classified as a modifier or "other". + + The Cs (Surrogate) property applies only to characters in the range + U+D800 to U+DFFF. Such characters are not valid in Unicode strings and + so cannot be tested by PCRE2, unless UTF validity checking has been + turned off (see the discussion of PCRE2_NO_UTF_CHECK in the pcre2api + page). Perl does not support the Cs property. + + The long synonyms for property names that Perl supports (such as + \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix + any of these properties with "Is". + + No character that is in the Unicode table has the Cn (unassigned) prop- + erty. Instead, this property is assumed for any code point that is not + in the Unicode table. + + Specifying caseless matching does not affect these escape sequences. + For example, \p{Lu} always matches only upper case letters. This is + different from the behaviour of current versions of Perl. + + Matching characters by Unicode property is not fast, because PCRE2 has + to do a multistage table lookup in order to find a character's prop- + erty. That is why the traditional escape sequences such as \d and \w do + not use Unicode properties in PCRE2 by default, though you can make + them do so by setting the PCRE2_UCP option or by starting the pattern + with (*UCP). + + Extended grapheme clusters + + The \X escape matches any number of Unicode characters that form an + "extended grapheme cluster", and treats the sequence as an atomic group + (see below). Unicode supports various kinds of composite character by + giving each character a grapheme breaking property, and having rules + that use these properties to define the boundaries of extended grapheme + clusters. \X always matches at least one character. Then it decides + whether to add additional characters according to the following rules + for ending a cluster: + + 1. End at the end of the subject string. + + 2. Do not end between CR and LF; otherwise end after any control char- + acter. + + 3. Do not break Hangul (a Korean script) syllable sequences. Hangul + characters are of five types: L, V, T, LV, and LVT. An L character may + be followed by an L, V, LV, or LVT character; an LV or V character may + be followed by a V or T character; an LVT or T character may be follwed + only by a T character. + + 4. Do not end before extending characters or spacing marks. Characters + with the "mark" property always have the "extend" grapheme breaking + property. + + 5. Do not end after prepend characters. + + 6. Otherwise, end the cluster. + + PCRE2's additional properties + + As well as the standard Unicode properties described above, PCRE2 sup- + ports four more that make it possible to convert traditional escape + sequences such as \w and \s to use Unicode properties. PCRE2 uses these + non-standard, non-Perl properties internally when PCRE2_UCP is set. + However, they may also be used explicitly. These properties are: + + Xan Any alphanumeric character + Xps Any POSIX space character + Xsp Any Perl space character + Xwd Any Perl "word" character + + Xan matches characters that have either the L (letter) or the N (num- + ber) property. Xps matches the characters tab, linefeed, vertical tab, + form feed, or carriage return, and any other character that has the Z + (separator) property. Xsp is the same as Xps; in PCRE1 it used to + exclude vertical tab, for Perl compatibility, but Perl changed. Xwd + matches the same characters as Xan, plus underscore. + + There is another non-standard property, Xuc, which matches any charac- + ter that can be represented by a Universal Character Name in C++ and + other programming languages. These are the characters $, @, ` (grave + accent), and all characters with Unicode code points greater than or + equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that + most base (ASCII) characters are excluded. (Universal Character Names + are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit. + Note that the Xuc property does not match these sequences but the char- + acters that they represent.) + + Resetting the match start + + The escape sequence \K causes any previously matched characters not to + be included in the final matched sequence. For example, the pattern: + + foo\Kbar + + matches "foobar", but reports that it has matched "bar". This feature + is similar to a lookbehind assertion (described below). However, in + this case, the part of the subject before the real match does not have + to be of fixed length, as lookbehind assertions do. The use of \K does + not interfere with the setting of captured substrings. For example, + when the pattern + + (foo)\Kbar + + matches "foobar", the first substring is still set to "foo". + + Perl documents that the use of \K within assertions is "not well + defined". In PCRE2, \K is acted upon when it occurs inside positive + assertions, but is ignored in negative assertions. Note that when a + pattern such as (?=ab\K) matches, the reported start of the match can + be greater than the end of the match. + + Simple assertions + + The final use of backslash is for certain simple assertions. An asser- + tion specifies a condition that has to be met at a particular point in + a match, without consuming any characters from the subject string. The + use of subpatterns for more complicated assertions is described below. + The backslashed assertions are: + + \b matches at a word boundary + \B matches when not at a word boundary + \A matches at the start of the subject + \Z matches at the end of the subject + also matches before a newline at the end of the subject + \z matches only at the end of the subject + \G matches at the first matching position in the subject + + Inside a character class, \b has a different meaning; it matches the + backspace character. If any other of these assertions appears in a + character class, an "invalid escape sequence" error is generated. + + A word boundary is a position in the subject string where the current + character and the previous character do not both match \w or \W (i.e. + one matches \w and the other matches \W), or the start or end of the + string if the first or last character matches \w, respectively. In a + UTF mode, the meanings of \w and \W can be changed by setting the + PCRE2_UCP option. When this is done, it also affects \b and \B. Neither + PCRE2 nor Perl has a separate "start of word" or "end of word" metase- + quence. However, whatever follows \b normally determines which it is. + For example, the fragment \ba matches "a" at the start of a word. + + The \A, \Z, and \z assertions differ from the traditional circumflex + and dollar (described in the next section) in that they only ever match + at the very start and end of the subject string, whatever options are + set. Thus, they are independent of multiline mode. These three asser- + tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options, + which affect only the behaviour of the circumflex and dollar metachar- + acters. However, if the startoffset argument of pcre2_match() is non- + zero, indicating that matching is to start at a point other than the + beginning of the subject, \A can never match. The difference between + \Z and \z is that \Z matches before a newline at the end of the string + as well as at the very end, whereas \z matches only at the end. + + The \G assertion is true only when the current matching position is at + the start point of the match, as specified by the startoffset argument + of pcre2_match(). It differs from \A when the value of startoffset is + non-zero. By calling pcre2_match() multiple times with appropriate + arguments, you can mimic Perl's /g option, and it is in this kind of + implementation where \G can be useful. + + Note, however, that PCRE2's interpretation of \G, as the start of the + current match, is subtly different from Perl's, which defines it as the + end of the previous match. In Perl, these can be different when the + previously matched string was empty. Because PCRE2 does just one match + at a time, it cannot reproduce this behaviour. + + If all the alternatives of a pattern begin with \G, the expression is + anchored to the starting match position, and the "anchored" flag is set + in the compiled regular expression. + + +CIRCUMFLEX AND DOLLAR + + The circumflex and dollar metacharacters are zero-width assertions. + That is, they test for a particular condition being true without con- + suming any characters from the subject string. These two metacharacters + are concerned with matching the starts and ends of lines. If the new- + line convention is set so that only the two-character sequence CRLF is + recognized as a newline, isolated CR and LF characters are treated as + ordinary data characters, and are not recognized as newlines. + + Outside a character class, in the default matching mode, the circumflex + character is an assertion that is true only if the current matching + point is at the start of the subject string. If the startoffset argu- + ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum- + flex can never match if the PCRE2_MULTILINE option is unset. Inside a + character class, circumflex has an entirely different meaning (see + below). + + Circumflex need not be the first character of the pattern if a number + of alternatives are involved, but it should be the first thing in each + alternative in which it appears if the pattern is ever to match that + branch. If all possible alternatives start with a circumflex, that is, + if the pattern is constrained to match only at the start of the sub- + ject, it is said to be an "anchored" pattern. (There are also other + constructs that can cause a pattern to be anchored.) + + The dollar character is an assertion that is true only if the current + matching point is at the end of the subject string, or immediately + before a newline at the end of the string (by default), unless + PCRE2_NOTEOL is set. Note, however, that it does not actually match the + newline. Dollar need not be the last character of the pattern if a num- + ber of alternatives are involved, but it should be the last item in any + branch in which it appears. Dollar has no special meaning in a charac- + ter class. + + The meaning of dollar can be changed so that it matches only at the + very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at + compile time. This does not affect the \Z assertion. + + The meanings of the circumflex and dollar metacharacters are changed if + the PCRE2_MULTILINE option is set. When this is the case, a dollar + character matches before any newlines in the string, as well as at the + very end, and a circumflex matches immediately after internal newlines + as well as at the start of the subject string. It does not match after + a newline that ends the string, for compatibility with Perl. However, + this can be changed by setting the PCRE2_ALT_CIRCUMFLEX option. + + For example, the pattern /^abc$/ matches the subject string "def\nabc" + (where \n represents a newline) in multiline mode, but not otherwise. + Consequently, patterns that are anchored in single line mode because + all branches start with ^ are not anchored in multiline mode, and a + match for circumflex is possible when the startoffset argument of + pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored + if PCRE2_MULTILINE is set. + + When the newline convention (see "Newline conventions" below) recog- + nizes the two-character sequence CRLF as a newline, this is preferred, + even if the single characters CR and LF are also recognized as new- + lines. For example, if the newline convention is "any", a multiline + mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather + than after CR, even though CR on its own is a valid newline. (It also + matches at the very start of the string, of course.) + + Note that the sequences \A, \Z, and \z can be used to match the start + and end of the subject in both modes, and if all branches of a pattern + start with \A it is always anchored, whether or not PCRE2_MULTILINE is + set. + + +FULL STOP (PERIOD, DOT) AND \N + + Outside a character class, a dot in the pattern matches any one charac- + ter in the subject string except (by default) a character that signi- + fies the end of a line. + + When a line ending is defined as a single character, dot never matches + that character; when the two-character sequence CRLF is used, dot does + not match CR if it is immediately followed by LF, but otherwise it + matches all characters (including isolated CRs and LFs). When any Uni- + code line endings are being recognized, dot does not match CR or LF or + any of the other line ending characters. + + The behaviour of dot with regard to newlines can be changed. If the + PCRE2_DOTALL option is set, a dot matches any one character, without + exception. If the two-character sequence CRLF is present in the sub- + ject string, it takes two dots to match it. + + The handling of dot is entirely independent of the handling of circum- + flex and dollar, the only relationship being that they both involve + newlines. Dot has no special meaning in a character class. + + The escape sequence \N behaves like a dot, except that it is not + affected by the PCRE2_DOTALL option. In other words, it matches any + character except one that signifies the end of a line. Perl also uses + \N to match characters by name; PCRE2 does not support this. + + +MATCHING A SINGLE CODE UNIT + + Outside a character class, the escape sequence \C matches any one code + unit, whether or not a UTF mode is set. In the 8-bit library, one code + unit is one byte; in the 16-bit library it is a 16-bit unit; in the + 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches + line-ending characters. The feature is provided in Perl in order to + match individual bytes in UTF-8 mode, but it is unclear how it can use- + fully be used. + + Because \C breaks up characters into individual code units, matching + one unit with \C in UTF-8 or UTF-16 mode means that the rest of the + string may start with a malformed UTF character. This has undefined + results, because PCRE2 assumes that it is matching character by charac- + ter in a valid UTF string (by default it checks the subject string's + validity at the start of processing unless the PCRE2_NO_UTF_CHECK + option is used). + + An application can lock out the use of \C by setting the + PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also + possible to build PCRE2 with the use of \C permanently disabled. + + PCRE2 does not allow \C to appear in lookbehind assertions (described + below) in UTF-8 or UTF-16 modes, because this would make it impossible + to calculate the length of the lookbehind. Neither the alternative + matching function pcre2_dfa_match() nor the JIT optimizer support \C in + these UTF modes. The former gives a match-time error; the latter fails + to optimize and so the match is always run using the interpreter. + + In the 32-bit library, however, \C is always supported (when not + explicitly locked out) because it always matches a single code unit, + whether or not UTF-32 is specified. + + In general, the \C escape sequence is best avoided. However, one way of + using it that avoids the problem of malformed UTF-8 or UTF-16 charac- + ters is to use a lookahead to check the length of the next character, + as in this pattern, which could be used with a UTF-8 string (ignore + white space and line breaks): + + (?| (?=[\x00-\x7f])(\C) | + (?=[\x80-\x{7ff}])(\C)(\C) | + (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) | + (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C)) + + In this example, a group that starts with (?| resets the capturing + parentheses numbers in each alternative (see "Duplicate Subpattern Num- + bers" below). The assertions at the start of each branch check the next + UTF-8 character for values whose encoding uses 1, 2, 3, or 4 bytes, + respectively. The character's individual bytes are then captured by the + appropriate number of \C groups. + + +SQUARE BRACKETS AND CHARACTER CLASSES + + An opening square bracket introduces a character class, terminated by a + closing square bracket. A closing square bracket on its own is not spe- + cial by default. If a closing square bracket is required as a member + of the class, it should be the first data character in the class (after + an initial circumflex, if present) or escaped with a backslash. This + means that, by default, an empty class cannot be defined. However, if + the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at + the start does end the (empty) class. + + A character class matches a single character in the subject. A matched + character must be in the set of characters defined by the class, unless + the first character in the class definition is a circumflex, in which + case the subject character must not be in the set defined by the class. + If a circumflex is actually required as a member of the class, ensure + it is not the first character, or escape it with a backslash. + + For example, the character class [aeiou] matches any lower case vowel, + while [^aeiou] matches any character that is not a lower case vowel. + Note that a circumflex is just a convenient notation for specifying the + characters that are in the class by enumerating those that are not. A + class that starts with a circumflex is not an assertion; it still con- + sumes a character from the subject string, and therefore it fails if + the current pointer is at the end of the string. + + When caseless matching is set, any letters in a class represent both + their upper case and lower case versions, so for example, a caseless + [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not + match "A", whereas a caseful version would. + + Characters that might indicate line breaks are never treated in any + special way when matching character classes, whatever line-ending + sequence is in use, and whatever setting of the PCRE2_DOTALL and + PCRE2_MULTILINE options is used. A class such as [^a] always matches + one of these characters. + + The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v, \V, + \w, and \W may appear in a character class, and add the characters that + they match to the class. For example, [\dABCDEF] matches any hexadeci- + mal digit. In UTF modes, the PCRE2_UCP option affects the meanings of + \d, \s, \w and their upper case partners, just as it does when they + appear outside a character class, as described in the section entitled + "Generic character types" above. The escape sequence \b has a different + meaning inside a character class; it matches the backspace character. + The sequences \B, \N, \R, and \X are not special inside a character + class. Like any other unrecognized escape sequences, they cause an + error. + + The minus (hyphen) character can be used to specify a range of charac- + ters in a character class. For example, [d-m] matches any letter + between d and m, inclusive. If a minus character is required in a + class, it must be escaped with a backslash or appear in a position + where it cannot be interpreted as indicating a range, typically as the + first or last character in the class, or immediately after a range. For + example, [b-d-z] matches letters in the range b to d, a hyphen charac- + ter, or z. + + Perl treats a hyphen as a literal if it appears before or after a POSIX + class (see below) or a character type escape such as as \d, but gives a + warning in its warning mode, as this is most likely a user error. As + PCRE2 has no facility for warning, an error is given in these cases. + + It is not possible to have the literal character "]" as the end charac- + ter of a range. A pattern such as [W-]46] is interpreted as a class of + two characters ("W" and "-") followed by a literal string "46]", so it + would match "W46]" or "-46]". However, if the "]" is escaped with a + backslash it is interpreted as the end of range, so [W-\]46] is inter- + preted as a class containing a range followed by two other characters. + The octal or hexadecimal representation of "]" can also be used to end + a range. + + Ranges normally include all code points between the start and end char- + acters, inclusive. They can also be used for code points specified + numerically, for example [\000-\037]. Ranges can include any characters + that are valid for the current mode. + + There is a special case in EBCDIC environments for ranges whose end + points are both specified as literal letters in the same case. For com- + patibility with Perl, EBCDIC code points within the range that are not + letters are omitted. For example, [h-k] matches only four characters, + even though the codes for h and k are 0x88 and 0x92, a range of 11 code + points. However, if the range is specified numerically, for example, + [\x88-\x92] or [h-\x92], all code points are included. + + If a range that includes letters is used when caseless matching is set, + it matches the letters in either case. For example, [W-c] is equivalent + to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if + character tables for a French locale are in use, [\xc8-\xcb] matches + accented E characters in both cases. + + A circumflex can conveniently be used with the upper case character + types to specify a more restricted set of characters than the matching + lower case type. For example, the class [^\W_] matches any letter or + digit, but not underscore, whereas [\w] includes underscore. A positive + character class should be read as "something OR something OR ..." and a + negative class as "NOT something AND NOT something AND NOT ...". + + The only metacharacters that are recognized in character classes are + backslash, hyphen (only where it can be interpreted as specifying a + range), circumflex (only at the start), opening square bracket (only + when it can be interpreted as introducing a POSIX class name, or for a + special compatibility feature - see the next two sections), and the + terminating closing square bracket. However, escaping other non- + alphanumeric characters does no harm. + + +POSIX CHARACTER CLASSES + + Perl supports the POSIX notation for character classes. This uses names + enclosed by [: and :] within the enclosing square brackets. PCRE2 also + supports this notation. For example, + + [01[:alpha:]%] + + matches "0", "1", any alphabetic character, or "%". The supported class + names are: + + alnum letters and digits + alpha letters + ascii character codes 0 - 127 + blank space or tab only + cntrl control characters + digit decimal digits (same as \d) + graph printing characters, excluding space + lower lower case letters + print printing characters, including space + punct printing characters, excluding letters and digits and space + space white space (the same as \s from PCRE2 8.34) + upper upper case letters + word "word" characters (same as \w) + xdigit hexadecimal digits + + The default "space" characters are HT (9), LF (10), VT (11), FF (12), + CR (13), and space (32). If locale-specific matching is taking place, + the list of space characters may be different; there may be fewer or + more of them. "Space" and \s match the same set of characters. + + The name "word" is a Perl extension, and "blank" is a GNU extension + from Perl 5.8. Another Perl extension is negation, which is indicated + by a ^ character after the colon. For example, + + [12[:^digit:]] + + matches "1", "2", or any non-digit. PCRE2 (and Perl) also recognize the + POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but + these are not supported, and an error is given if they are encountered. + + By default, characters with values greater than 127 do not match any of + the POSIX character classes, although this may be different for charac- + ters in the range 128-255 when locale-specific matching is happening. + However, if the PCRE2_UCP option is passed to pcre2_compile(), some of + the classes are changed so that Unicode character properties are used. + This is achieved by replacing certain POSIX classes with other + sequences, as follows: + + [:alnum:] becomes \p{Xan} + [:alpha:] becomes \p{L} + [:blank:] becomes \h + [:cntrl:] becomes \p{Cc} + [:digit:] becomes \p{Nd} + [:lower:] becomes \p{Ll} + [:space:] becomes \p{Xps} + [:upper:] becomes \p{Lu} + [:word:] becomes \p{Xwd} + + Negated versions, such as [:^alpha:] use \P instead of \p. Three other + POSIX classes are handled specially in UCP mode: + + [:graph:] This matches characters that have glyphs that mark the page + when printed. In Unicode property terms, it matches all char- + acters with the L, M, N, P, S, or Cf properties, except for: + + U+061C Arabic Letter Mark + U+180E Mongolian Vowel Separator + U+2066 - U+2069 Various "isolate"s + + + [:print:] This matches the same characters as [:graph:] plus space + characters that are not controls, that is, characters with + the Zs property. + + [:punct:] This matches all characters that have the Unicode P (punctua- + tion) property, plus those characters with code points less + than 256 that have the S (Symbol) property. + + The other POSIX classes are unchanged, and match only characters with + code points less than 256. + + +COMPATIBILITY FEATURE FOR WORD BOUNDARIES + + In the POSIX.2 compliant library that was included in 4.4BSD Unix, the + ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word" + and "end of word". PCRE2 treats these items as follows: + + [[:<:]] is converted to \b(?=\w) + [[:>:]] is converted to \b(?<=\w) + + Only these exact character sequences are recognized. A sequence such as + [a[:<:]b] provokes error for an unrecognized POSIX class name. This + support is not compatible with Perl. It is provided to help migrations + from other environments, and is best not used in any new patterns. Note + that \b matches at the start and the end of a word (see "Simple asser- + tions" above), and in a Perl-style pattern the preceding or following + character normally shows which is wanted, without the need for the + assertions that are used above in order to give exactly the POSIX be- + haviour. + + +VERTICAL BAR + + Vertical bar characters are used to separate alternative patterns. For + example, the pattern + + gilbert|sullivan + + matches either "gilbert" or "sullivan". Any number of alternatives may + appear, and an empty alternative is permitted (matching the empty + string). The matching process tries each alternative in turn, from left + to right, and the first one that succeeds is used. If the alternatives + are within a subpattern (defined below), "succeeds" means matching the + rest of the main pattern as well as the alternative in the subpattern. + + +INTERNAL OPTION SETTING + + The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and + PCRE2_EXTENDED options (which are Perl-compatible) can be changed from + within the pattern by a sequence of Perl option letters enclosed + between "(?" and ")". The option letters are + + i for PCRE2_CASELESS + m for PCRE2_MULTILINE + s for PCRE2_DOTALL + x for PCRE2_EXTENDED + + For example, (?im) sets caseless, multiline matching. It is also possi- + ble to unset these options by preceding the letter with a hyphen, and a + combined setting and unsetting such as (?im-sx), which sets PCRE2_CASE- + LESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and + PCRE2_EXTENDED, is also permitted. If a letter appears both before and + after the hyphen, the option is unset. An empty options setting "(?)" + is allowed. Needless to say, it has no effect. + + The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be + changed in the same way as the Perl-compatible options by using the + characters J and U respectively. + + When one of these option changes occurs at top level (that is, not + inside subpattern parentheses), the change applies to the remainder of + the pattern that follows. An option change within a subpattern (see + below for a description of subpatterns) affects only that part of the + subpattern that follows it, so + + (a(?i)b)c + + matches abc and aBc and no other strings (assuming PCRE2_CASELESS is + not used). By this means, options can be made to have different set- + tings in different parts of the pattern. Any changes made in one alter- + native do carry on into subsequent branches within the same subpattern. + For example, + + (a(?i)b|c) + + matches "ab", "aB", "c", and "C", even though when matching "C" the + first branch is abandoned before the option setting. This is because + the effects of option settings happen at compile time. There would be + some very weird behaviour otherwise. + + As a convenient shorthand, if any option settings are required at the + start of a non-capturing subpattern (see the next section), the option + letters may appear between the "?" and the ":". Thus the two patterns + + (?i:saturday|sunday) + (?:(?i)saturday|sunday) + + match exactly the same set of strings. + + Note: There are other PCRE2-specific options that can be set by the + application when the compiling function is called. The pattern can con- + tain special leading sequences such as (*CRLF) to override what the + application has set or what has been defaulted. Details are given in + the section entitled "Newline sequences" above. There are also the + (*UTF) and (*UCP) leading sequences that can be used to set UTF and + Unicode property modes; they are equivalent to setting the PCRE2_UTF + and PCRE2_UCP options, respectively. However, the application can set + the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP options, which lock out the use + of the (*UTF) and (*UCP) sequences. + + +SUBPATTERNS + + Subpatterns are delimited by parentheses (round brackets), which can be + nested. Turning part of a pattern into a subpattern does two things: + + 1. It localizes a set of alternatives. For example, the pattern + + cat(aract|erpillar|) + + matches "cataract", "caterpillar", or "cat". Without the parentheses, + it would match "cataract", "erpillar" or an empty string. + + 2. It sets up the subpattern as a capturing subpattern. This means + that, when the whole pattern matches, the portion of the subject string + that matched the subpattern is passed back to the caller, separately + from the portion that matched the whole pattern. (This applies only to + the traditional matching function; the DFA matching function does not + support capturing.) + + Opening parentheses are counted from left to right (starting from 1) to + obtain numbers for the capturing subpatterns. For example, if the + string "the red king" is matched against the pattern + + the ((red|white) (king|queen)) + + the captured substrings are "red king", "red", and "king", and are num- + bered 1, 2, and 3, respectively. + + The fact that plain parentheses fulfil two functions is not always + helpful. There are often times when a grouping subpattern is required + without a capturing requirement. If an opening parenthesis is followed + by a question mark and a colon, the subpattern does not do any captur- + ing, and is not counted when computing the number of any subsequent + capturing subpatterns. For example, if the string "the white queen" is + matched against the pattern + + the ((?:red|white) (king|queen)) + + the captured substrings are "white queen" and "queen", and are numbered + 1 and 2. The maximum number of capturing subpatterns is 65535. + + As a convenient shorthand, if any option settings are required at the + start of a non-capturing subpattern, the option letters may appear + between the "?" and the ":". Thus the two patterns + + (?i:saturday|sunday) + (?:(?i)saturday|sunday) + + match exactly the same set of strings. Because alternative branches are + tried from left to right, and options are not reset until the end of + the subpattern is reached, an option setting in one branch does affect + subsequent branches, so the above patterns match "SUNDAY" as well as + "Saturday". + + +DUPLICATE SUBPATTERN NUMBERS + + Perl 5.10 introduced a feature whereby each alternative in a subpattern + uses the same numbers for its capturing parentheses. Such a subpattern + starts with (?| and is itself a non-capturing subpattern. For example, + consider this pattern: + + (?|(Sat)ur|(Sun))day + + Because the two alternatives are inside a (?| group, both sets of cap- + turing parentheses are numbered one. Thus, when the pattern matches, + you can look at captured substring number one, whichever alternative + matched. This construct is useful when you want to capture part, but + not all, of one of a number of alternatives. Inside a (?| group, paren- + theses are numbered as usual, but the number is reset at the start of + each branch. The numbers of any capturing parentheses that follow the + subpattern start after the highest number used in any branch. The fol- + lowing example is taken from the Perl documentation. The numbers under- + neath show in which buffer the captured content will be stored. + + # before ---------------branch-reset----------- after + / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x + # 1 2 2 3 2 3 4 + + A back reference to a numbered subpattern uses the most recent value + that is set for that number by any subpattern. The following pattern + matches "abcabc" or "defdef": + + /(?|(abc)|(def))\1/ + + In contrast, a subroutine call to a numbered subpattern always refers + to the first one in the pattern with the given number. The following + pattern matches "abcabc" or "defabc": + + /(?|(abc)|(def))(?1)/ + + A relative reference such as (?-1) is no different: it is just a conve- + nient way of computing an absolute group number. + + If a condition test for a subpattern's having matched refers to a non- + unique number, the test is true if any of the subpatterns of that num- + ber have matched. + + An alternative approach to using this "branch reset" feature is to use + duplicate named subpatterns, as described in the next section. + + +NAMED SUBPATTERNS + + Identifying capturing parentheses by number is simple, but it can be + very hard to keep track of the numbers in complicated regular expres- + sions. Furthermore, if an expression is modified, the numbers may + change. To help with this difficulty, PCRE2 supports the naming of sub- + patterns. This feature was not added to Perl until release 5.10. Python + had the feature earlier, and PCRE1 introduced it at release 4.0, using + the Python syntax. PCRE2 supports both the Perl and the Python syntax. + Perl allows identically numbered subpatterns to have different names, + but PCRE2 does not. + + In PCRE2, a subpattern can be named in one of three ways: (?...) + or (?'name'...) as in Perl, or (?P...) as in Python. References + to capturing parentheses from other parts of the pattern, such as back + references, recursion, and conditions, can be made by name as well as + by number. + + Names consist of up to 32 alphanumeric characters and underscores, but + must start with a non-digit. Named capturing parentheses are still + allocated numbers as well as names, exactly as if the names were not + present. The PCRE2 API provides function calls for extracting the name- + to-number translation table from a compiled pattern. There are also + convenience functions for extracting a captured substring by name. + + By default, a name must be unique within a pattern, but it is possible + to relax this constraint by setting the PCRE2_DUPNAMES option at com- + pile time. (Duplicate names are also always permitted for subpatterns + with the same number, set up as described in the previous section.) + Duplicate names can be useful for patterns where only one instance of + the named parentheses can match. Suppose you want to match the name of + a weekday, either as a 3-letter abbreviation or as the full name, and + in both cases you want to extract the abbreviation. This pattern + (ignoring the line breaks) does the job: + + (?Mon|Fri|Sun)(?:day)?| + (?Tue)(?:sday)?| + (?Wed)(?:nesday)?| + (?Thu)(?:rsday)?| + (?Sat)(?:urday)? + + There are five capturing substrings, but only one is ever set after a + match. (An alternative way of solving this problem is to use a "branch + reset" subpattern, as described in the previous section.) + + The convenience functions for extracting the data by name returns the + substring for the first (and in this example, the only) subpattern of + that name that matched. This saves searching to find which numbered + subpattern it was. + + If you make a back reference to a non-unique named subpattern from + elsewhere in the pattern, the subpatterns to which the name refers are + checked in the order in which they appear in the overall pattern. The + first one that is set is used for the reference. For example, this pat- + tern matches both "foofoo" and "barbar" but not "foobar" or "barfoo": + + (?:(?foo)|(?bar))\k + + + If you make a subroutine call to a non-unique named subpattern, the one + that corresponds to the first occurrence of the name is used. In the + absence of duplicate numbers (see the previous section) this is the one + with the lowest number. + + If you use a named reference in a condition test (see the section about + conditions below), either to check whether a subpattern has matched, or + to check for recursion, all subpatterns with the same name are tested. + If the condition is true for any one of them, the overall condition is + true. This is the same behaviour as testing by number. For further + details of the interfaces for handling named subpatterns, see the + pcre2api documentation. + + Warning: You cannot use different names to distinguish between two sub- + patterns with the same number because PCRE2 uses only the numbers when + matching. For this reason, an error is given at compile time if differ- + ent names are given to subpatterns with the same number. However, you + can always give the same name to subpatterns with the same number, even + when PCRE2_DUPNAMES is not set. + + +REPETITION + + Repetition is specified by quantifiers, which can follow any of the + following items: + + a literal data character + the dot metacharacter + the \C escape sequence + the \X escape sequence + the \R escape sequence + an escape such as \d or \pL that matches a single character + a character class + a back reference + a parenthesized subpattern (including most assertions) + a subroutine call to a subpattern (recursive or otherwise) + + The general repetition quantifier specifies a minimum and maximum num- + ber of permitted matches, by giving the two numbers in curly brackets + (braces), separated by a comma. The numbers must be less than 65536, + and the first must be less than or equal to the second. For example: + + z{2,4} + + matches "zz", "zzz", or "zzzz". A closing brace on its own is not a + special character. If the second number is omitted, but the comma is + present, there is no upper limit; if the second number and the comma + are both omitted, the quantifier specifies an exact number of required + matches. Thus + + [aeiou]{3,} + + matches at least 3 successive vowels, but may match many more, whereas + + \d{8} + + matches exactly 8 digits. An opening curly bracket that appears in a + position where a quantifier is not allowed, or one that does not match + the syntax of a quantifier, is taken as a literal character. For exam- + ple, {,6} is not a quantifier, but a literal string of four characters. + + In UTF modes, quantifiers apply to characters rather than to individual + code units. Thus, for example, \x{100}{2} matches two characters, each + of which is represented by a two-byte sequence in a UTF-8 string. Simi- + larly, \X{3} matches three Unicode extended grapheme clusters, each of + which may be several code units long (and they may be of different + lengths). + + The quantifier {0} is permitted, causing the expression to behave as if + the previous item and the quantifier were not present. This may be use- + ful for subpatterns that are referenced as subroutines from elsewhere + in the pattern (but see also the section entitled "Defining subpatterns + for use by reference only" below). Items other than subpatterns that + have a {0} quantifier are omitted from the compiled pattern. + + For convenience, the three most common quantifiers have single-charac- + ter abbreviations: + + * is equivalent to {0,} + + is equivalent to {1,} + ? is equivalent to {0,1} + + It is possible to construct infinite loops by following a subpattern + that can match no characters with a quantifier that has no upper limit, + for example: + + (a?)* + + Earlier versions of Perl and PCRE1 used to give an error at compile + time for such patterns. However, because there are cases where this can + be useful, such patterns are now accepted, but if any repetition of the + subpattern does in fact match no characters, the loop is forcibly bro- + ken. + + By default, the quantifiers are "greedy", that is, they match as much + as possible (up to the maximum number of permitted times), without + causing the rest of the pattern to fail. The classic example of where + this gives problems is in trying to match comments in C programs. These + appear between /* and */ and within the comment, individual * and / + characters may appear. An attempt to match C comments by applying the + pattern + + /\*.*\*/ + + to the string + + /* first comment */ not comment /* second comment */ + + fails, because it matches the entire string owing to the greediness of + the .* item. + + If a quantifier is followed by a question mark, it ceases to be greedy, + and instead matches the minimum number of times possible, so the pat- + tern + + /\*.*?\*/ + + does the right thing with the C comments. The meaning of the various + quantifiers is not otherwise changed, just the preferred number of + matches. Do not confuse this use of question mark with its use as a + quantifier in its own right. Because it has two uses, it can sometimes + appear doubled, as in + + \d??\d + + which matches one digit by preference, but can match two if that is the + only way the rest of the pattern matches. + + If the PCRE2_UNGREEDY option is set (an option that is not available in + Perl), the quantifiers are not greedy by default, but individual ones + can be made greedy by following them with a question mark. In other + words, it inverts the default behaviour. + + When a parenthesized subpattern is quantified with a minimum repeat + count that is greater than 1 or with a limited maximum, more memory is + required for the compiled pattern, in proportion to the size of the + minimum or maximum. + + If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option + (equivalent to Perl's /s) is set, thus allowing the dot to match new- + lines, the pattern is implicitly anchored, because whatever follows + will be tried against every character position in the subject string, + so there is no point in retrying the overall match at any position + after the first. PCRE2 normally treats such a pattern as though it were + preceded by \A. + + In cases where it is known that the subject string contains no new- + lines, it is worth setting PCRE2_DOTALL in order to obtain this opti- + mization, or alternatively, using ^ to indicate anchoring explicitly. + + However, there are some cases where the optimization cannot be used. + When .* is inside capturing parentheses that are the subject of a back + reference elsewhere in the pattern, a match at the start may fail where + a later one succeeds. Consider, for example: + + (.*)abc\1 + + If the subject is "xyz123abc123" the match point is the fourth charac- + ter. For this reason, such a pattern is not implicitly anchored. + + Another case where implicit anchoring is not applied is when the lead- + ing .* is inside an atomic group. Once again, a match at the start may + fail where a later one succeeds. Consider this pattern: + + (?>.*?a)b + + It matches "ab" in the subject "aab". The use of the backtracking con- + trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and + there is an option, PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly. + + When a capturing subpattern is repeated, the value captured is the sub- + string that matched the final iteration. For example, after + + (tweedle[dume]{3}\s*)+ + + has matched "tweedledum tweedledee" the value of the captured substring + is "tweedledee". However, if there are nested capturing subpatterns, + the corresponding captured values may have been set in previous itera- + tions. For example, after + + (a|(b))+ + + matches "aba" the value of the second captured substring is "b". + + +ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS + + With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") + repetition, failure of what follows normally causes the repeated item + to be re-evaluated to see if a different number of repeats allows the + rest of the pattern to match. Sometimes it is useful to prevent this, + either to change the nature of the match, or to cause it fail earlier + than it otherwise might, when the author of the pattern knows there is + no point in carrying on. + + Consider, for example, the pattern \d+foo when applied to the subject + line + + 123456bar + + After matching all 6 digits and then failing to match "foo", the normal + action of the matcher is to try again with only 5 digits matching the + \d+ item, and then with 4, and so on, before ultimately failing. + "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides + the means for specifying that once a subpattern has matched, it is not + to be re-evaluated in this way. + + If we use atomic grouping for the previous example, the matcher gives + up immediately on failing to match "foo" the first time. The notation + is a kind of special parenthesis, starting with (?> as in this example: + + (?>\d+)foo + + This kind of parenthesis "locks up" the part of the pattern it con- + tains once it has matched, and a failure further into the pattern is + prevented from backtracking into it. Backtracking past it to previous + items, however, works as normal. + + An alternative description is that a subpattern of this type matches + exactly the string of characters that an identical standalone pattern + would match, if anchored at the current point in the subject string. + + Atomic grouping subpatterns are not capturing subpatterns. Simple cases + such as the above example can be thought of as a maximizing repeat that + must swallow everything it can. So, while both \d+ and \d+? are pre- + pared to adjust the number of digits they match in order to make the + rest of the pattern match, (?>\d+) can only match an entire sequence of + digits. + + Atomic groups in general can of course contain arbitrarily complicated + subpatterns, and can be nested. However, when the subpattern for an + atomic group is just a single repeated item, as in the example above, a + simpler notation, called a "possessive quantifier" can be used. This + consists of an additional + character following a quantifier. Using + this notation, the previous example can be rewritten as + + \d++foo + + Note that a possessive quantifier can be used with an entire group, for + example: + + (abc|xyz){2,3}+ + + Possessive quantifiers are always greedy; the setting of the + PCRE2_UNGREEDY option is ignored. They are a convenient notation for + the simpler forms of atomic group. However, there is no difference in + the meaning of a possessive quantifier and the equivalent atomic group, + though there may be a performance difference; possessive quantifiers + should be slightly faster. + + The possessive quantifier syntax is an extension to the Perl 5.8 syn- + tax. Jeffrey Friedl originated the idea (and the name) in the first + edition of his book. Mike McCloskey liked it, so implemented it when he + built Sun's Java package, and PCRE1 copied it from there. It ultimately + found its way into Perl at release 5.10. + + PCRE2 has an optimization that automatically "possessifies" certain + simple pattern constructs. For example, the sequence A+B is treated as + A++B because there is no point in backtracking into a sequence of A's + when B must follow. This feature can be disabled by the PCRE2_NO_AUTO- + POSSESS option, or starting the pattern with (*NO_AUTO_POSSESS). + + When a pattern contains an unlimited repeat inside a subpattern that + can itself be repeated an unlimited number of times, the use of an + atomic group is the only way to avoid some failing matches taking a + very long time indeed. The pattern + + (\D+|<\d+>)*[!?] + + matches an unlimited number of substrings that either consist of non- + digits, or digits enclosed in <>, followed by either ! or ?. When it + matches, it runs quickly. However, if it is applied to + + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + + it takes a long time before reporting failure. This is because the + string can be divided between the internal \D+ repeat and the external + * repeat in a large number of ways, and all have to be tried. (The + example uses [!?] rather than a single character at the end, because + both PCRE2 and Perl have an optimization that allows for fast failure + when a single character is used. They remember the last single charac- + ter that is required for a match, and fail early if it is not present + in the string.) If the pattern is changed so that it uses an atomic + group, like this: + + ((?>\D+)|<\d+>)*[!?] + + sequences of non-digits cannot be broken, and failure happens quickly. + + +BACK REFERENCES + + Outside a character class, a backslash followed by a digit greater than + 0 (and possibly further digits) is a back reference to a capturing sub- + pattern earlier (that is, to its left) in the pattern, provided there + have been that many previous capturing left parentheses. + + However, if the decimal number following the backslash is less than 8, + it is always taken as a back reference, and causes an error only if + there are not that many capturing left parentheses in the entire pat- + tern. In other words, the parentheses that are referenced need not be + to the left of the reference for numbers less than 8. A "forward back + reference" of this type can make sense when a repetition is involved + and the subpattern to the right has participated in an earlier itera- + tion. + + It is not possible to have a numerical "forward back reference" to a + subpattern whose number is 8 or more using this syntax because a + sequence such as \50 is interpreted as a character defined in octal. + See the subsection entitled "Non-printing characters" above for further + details of the handling of digits following a backslash. There is no + such problem when named parentheses are used. A back reference to any + subpattern is possible using named parentheses (see below). + + Another way of avoiding the ambiguity inherent in the use of digits + following a backslash is to use the \g escape sequence. This escape + must be followed by a signed or unsigned number, optionally enclosed in + braces. These examples are all identical: + + (ring), \1 + (ring), \g1 + (ring), \g{1} + + An unsigned number specifies an absolute reference without the ambigu- + ity that is present in the older syntax. It is also useful when literal + digits follow the reference. A signed number is a relative reference. + Consider this example: + + (abc(def)ghi)\g{-1} + + The sequence \g{-1} is a reference to the most recently started captur- + ing subpattern before \g, that is, is it equivalent to \2 in this exam- + ple. Similarly, \g{-2} would be equivalent to \1. The use of relative + references can be helpful in long patterns, and also in patterns that + are created by joining together fragments that contain references + within themselves. + + The sequence \g{+1} is a reference to the next capturing subpattern. + This kind of forward reference can be useful it patterns that repeat. + Perl does not support the use of + in this way. + + A back reference matches whatever actually matched the capturing sub- + pattern in the current subject string, rather than anything matching + the subpattern itself (see "Subpatterns as subroutines" below for a way + of doing that). So the pattern + + (sens|respons)e and \1ibility + + matches "sense and sensibility" and "response and responsibility", but + not "sense and responsibility". If caseful matching is in force at the + time of the back reference, the case of letters is relevant. For exam- + ple, + + ((?i)rah)\s+\1 + + matches "rah rah" and "RAH RAH", but not "RAH rah", even though the + original capturing subpattern is matched caselessly. + + There are several different ways of writing back references to named + subpatterns. The .NET syntax \k{name} and the Perl syntax \k or + \k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's + unified back reference syntax, in which \g can be used for both numeric + and named references, is also supported. We could rewrite the above + example in any of the following ways: + + (?(?i)rah)\s+\k + (?'p1'(?i)rah)\s+\k{p1} + (?P(?i)rah)\s+(?P=p1) + (?(?i)rah)\s+\g{p1} + + A subpattern that is referenced by name may appear in the pattern + before or after the reference. + + There may be more than one back reference to the same subpattern. If a + subpattern has not actually been used in a particular match, any back + references to it always fail by default. For example, the pattern + + (a|(bc))\2 + + always fails if it starts to match "a" rather than "bc". However, if + the PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a back + reference to an unset value matches an empty string. + + Because there may be many capturing parentheses in a pattern, all dig- + its following a backslash are taken as part of a potential back refer- + ence number. If the pattern continues with a digit character, some + delimiter must be used to terminate the back reference. If the + PCRE2_EXTENDED option is set, this can be white space. Otherwise, the + \g{ syntax or an empty comment (see "Comments" below) can be used. + + Recursive back references + + A back reference that occurs inside the parentheses to which it refers + fails when the subpattern is first used, so, for example, (a\1) never + matches. However, such references can be useful inside repeated sub- + patterns. For example, the pattern + + (a|b\1)+ + + matches any number of "a"s and also "aba", "ababbaa" etc. At each iter- + ation of the subpattern, the back reference matches the character + string corresponding to the previous iteration. In order for this to + work, the pattern must be such that the first iteration does not need + to match the back reference. This can be done using alternation, as in + the example above, or by a quantifier with a minimum of zero. + + Back references of this type cause the group that they reference to be + treated as an atomic group. Once the whole group has been matched, a + subsequent matching failure cannot cause backtracking into the middle + of the group. + + +ASSERTIONS + + An assertion is a test on the characters following or preceding the + current matching point that does not consume any characters. The simple + assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described + above. + + More complicated assertions are coded as subpatterns. There are two + kinds: those that look ahead of the current position in the subject + string, and those that look behind it. An assertion subpattern is + matched in the normal way, except that it does not cause the current + matching position to be changed. + + Assertion subpatterns are not capturing subpatterns. If such an asser- + tion contains capturing subpatterns within it, these are counted for + the purposes of numbering the capturing subpatterns in the whole pat- + tern. However, substring capturing is carried out only for positive + assertions. (Perl sometimes, but not always, does do capturing in nega- + tive assertions.) + + WARNING: If a positive assertion containing one or more capturing sub- + patterns succeeds, but failure to match later in the pattern causes + backtracking over this assertion, the captures within the assertion are + reset only if no higher numbered captures are already set. This is, + unfortunately, a fundamental limitation of the current implementation; + it may get removed in a future reworking. + + For compatibility with Perl, most assertion subpatterns may be + repeated; though it makes no sense to assert the same thing several + times, the side effect of capturing parentheses may occasionally be + useful. However, an assertion that forms the condition for a condi- + tional subpattern may not be quantified. In practice, for other asser- + tions, there only three cases: + + (1) If the quantifier is {0}, the assertion is never obeyed during + matching. However, it may contain internal capturing parenthesized + groups that are called from elsewhere via the subroutine mechanism. + + (2) If quantifier is {0,n} where n is greater than zero, it is treated + as if it were {0,1}. At run time, the rest of the pattern match is + tried with and without the assertion, the order depending on the greed- + iness of the quantifier. + + (3) If the minimum repetition is greater than zero, the quantifier is + ignored. The assertion is obeyed just once when encountered during + matching. + + Lookahead assertions + + Lookahead assertions start with (?= for positive assertions and (?! for + negative assertions. For example, + + \w+(?=;) + + matches a word followed by a semicolon, but does not include the semi- + colon in the match, and + + foo(?!bar) + + matches any occurrence of "foo" that is not followed by "bar". Note + that the apparently similar pattern + + (?!foo)bar + + does not find an occurrence of "bar" that is preceded by something + other than "foo"; it finds any occurrence of "bar" whatsoever, because + the assertion (?!foo) is always true when the next three characters are + "bar". A lookbehind assertion is needed to achieve the other effect. + + If you want to force a matching failure at some point in a pattern, the + most convenient way to do it is with (?!) because an empty string + always matches, so an assertion that requires there not to be an empty + string must always fail. The backtracking control verb (*FAIL) or (*F) + is a synonym for (?!). + + Lookbehind assertions + + Lookbehind assertions start with (?<= for positive assertions and (?)...) or (?('name')...) to test for a + used subpattern by name. For compatibility with earlier versions of + PCRE1, which had this facility before Perl, the syntax (?(name)...) is + also recognized. Note, however, that undelimited names consisting of + the letter R followed by digits are ambiguous (see the following sec- + tion). + + Rewriting the above example to use a named subpattern gives this: + + (? \( )? [^()]+ (?() \) ) + + If the name used in a condition of this kind is a duplicate, the test + is applied to all subpatterns of the same name, and is true if any one + of them has matched. + + Checking for pattern recursion + + "Recursion" in this sense refers to any subroutine-like call from one + part of the pattern to another, whether or not it is actually recur- + sive. See the sections entitled "Recursive patterns" and "Subpatterns + as subroutines" below for details of recursion and subpattern calls. + + If a condition is the string (R), and there is no subpattern with the + name R, the condition is true if matching is currently in a recursion + or subroutine call to the whole pattern or any subpattern. If digits + follow the letter R, and there is no subpattern with that name, the + condition is true if the most recent call is into a subpattern with the + given number, which must exist somewhere in the overall pattern. This + is a contrived example that is equivalent to a+b: + + ((?(R1)a+|(?1)b)) + + However, in both cases, if there is a subpattern with a matching name, + the condition tests for its being set, as described in the section + above, instead of testing for recursion. For example, creating a group + with the name R1 by adding (?) to the above pattern completely + changes its meaning. + + If a name preceded by ampersand follows the letter R, for example: + + (?(R&name)...) + + the condition is true if the most recent recursion is into a subpattern + of that name (which must exist within the pattern). + + This condition does not check the entire recursion stack. It tests only + the current level. If the name used in a condition of this kind is a + duplicate, the test is applied to all subpatterns of the same name, and + is true if any one of them is the most recent recursion. + + At "top level", all these recursion test conditions are false. + + Defining subpatterns for use by reference only + + If the condition is the string (DEFINE), the condition is always false, + even if there is a group with the name DEFINE. In this case, there may + be only one alternative in the subpattern. It is always skipped if con- + trol reaches this point in the pattern; the idea of DEFINE is that it + can be used to define subroutines that can be referenced from else- + where. (The use of subroutines is described below.) For example, a pat- + tern to match an IPv4 address such as "192.168.23.245" could be written + like this (ignore white space and line breaks): + + (?(DEFINE) (? 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) ) + \b (?&byte) (\.(?&byte)){3} \b + + The first part of the pattern is a DEFINE group inside which a another + group named "byte" is defined. This matches an individual component of + an IPv4 address (a number less than 256). When matching takes place, + this part of the pattern is skipped because DEFINE acts like a false + condition. The rest of the pattern uses references to the named group + to match the four dot-separated components of an IPv4 address, insist- + ing on a word boundary at each end. + + Checking the PCRE2 version + + Programs that link with a PCRE2 library can check the version by call- + ing pcre2_config() with appropriate arguments. Users of applications + that do not have access to the underlying code cannot do this. A spe- + cial "condition" called VERSION exists to allow such users to discover + which version of PCRE2 they are dealing with by using this condition to + match a string such as "yesno". VERSION must be followed either by "=" + or ">=" and a version number. For example: + + (?(VERSION>=10.4)yes|no) + + This pattern matches "yes" if the PCRE2 version is greater or equal to + 10.4, or "no" otherwise. The fractional part of the version number may + not contain more than two digits. + + Assertion conditions + + If the condition is not in any of the above formats, it must be an + assertion. This may be a positive or negative lookahead or lookbehind + assertion. Consider this pattern, again containing non-significant + white space, and with the two alternatives on the second line: + + (?(?=[^a-z]*[a-z]) + \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) + + The condition is a positive lookahead assertion that matches an + optional sequence of non-letters followed by a letter. In other words, + it tests for the presence of at least one letter in the subject. If a + letter is found, the subject is matched against the first alternative; + otherwise it is matched against the second. This pattern matches + strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are + letters and dd are digits. + + +COMMENTS + + There are two ways of including comments in patterns that are processed + by PCRE2. In both cases, the start of the comment must not be in a + character class, nor in the middle of any other sequence of related + characters such as (?: or a subpattern name or number. The characters + that make up a comment play no part in the pattern matching. + + The sequence (?# marks the start of a comment that continues up to the + next closing parenthesis. Nested parentheses are not permitted. If the + PCRE2_EXTENDED option is set, an unescaped # character also introduces + a comment, which in this case continues to immediately after the next + newline character or character sequence in the pattern. Which charac- + ters are interpreted as newlines is controlled by an option passed to + the compiling function or by a special sequence at the start of the + pattern, as described in the section entitled "Newline conventions" + above. Note that the end of this type of comment is a literal newline + sequence in the pattern; escape sequences that happen to represent a + newline do not count. For example, consider this pattern when + PCRE2_EXTENDED is set, and the default newline convention (a single + linefeed character) is in force: + + abc #comment \n still comment + + On encountering the # character, pcre2_compile() skips along, looking + for a newline in the pattern. The sequence \n is still literal at this + stage, so it does not terminate the comment. Only an actual character + with the code value 0x0a (the default newline) does so. + + +RECURSIVE PATTERNS + + Consider the problem of matching a string in parentheses, allowing for + unlimited nested parentheses. Without the use of recursion, the best + that can be done is to use a pattern that matches up to some fixed + depth of nesting. It is not possible to handle an arbitrary nesting + depth. + + For some time, Perl has provided a facility that allows regular expres- + sions to recurse (amongst other things). It does this by interpolating + Perl code in the expression at run time, and the code can refer to the + expression itself. A Perl pattern using code interpolation to solve the + parentheses problem can be created like this: + + $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x; + + The (?p{...}) item interpolates Perl code at run time, and in this case + refers recursively to the pattern in which it appears. + + Obviously, PCRE2 cannot support the interpolation of Perl code. + Instead, it supports special syntax for recursion of the entire pat- + tern, and also for individual subpattern recursion. After its introduc- + tion in PCRE1 and Python, this kind of recursion was subsequently + introduced into Perl at release 5.10. + + A special item that consists of (? followed by a number greater than + zero and a closing parenthesis is a recursive subroutine call of the + subpattern of the given number, provided that it occurs inside that + subpattern. (If not, it is a non-recursive subroutine call, which is + described in the next section.) The special item (?R) or (?0) is a + recursive call of the entire regular expression. + + This PCRE2 pattern solves the nested parentheses problem (assume the + PCRE2_EXTENDED option is set so that white space is ignored): + + \( ( [^()]++ | (?R) )* \) + + First it matches an opening parenthesis. Then it matches any number of + substrings which can either be a sequence of non-parentheses, or a + recursive match of the pattern itself (that is, a correctly parenthe- + sized substring). Finally there is a closing parenthesis. Note the use + of a possessive quantifier to avoid backtracking into sequences of non- + parentheses. + + If this were part of a larger pattern, you would not want to recurse + the entire pattern, so instead you could use this: + + ( \( ( [^()]++ | (?1) )* \) ) + + We have put the pattern into parentheses, and caused the recursion to + refer to them instead of the whole pattern. + + In a larger pattern, keeping track of parenthesis numbers can be + tricky. This is made easier by the use of relative references. Instead + of (?1) in the pattern above you can write (?-2) to refer to the second + most recently opened parentheses preceding the recursion. In other + words, a negative number counts capturing parentheses leftwards from + the point at which it is encountered. + + Be aware however, that if duplicate subpattern numbers are in use, rel- + ative references refer to the earliest subpattern with the appropriate + number. Consider, for example: + + (?|(a)|(b)) (c) (?-2) + + The first two capturing groups (a) and (b) are both numbered 1, and + group (c) is number 2. When the reference (?-2) is encountered, the + second most recently opened parentheses has the number 1, but it is the + first such group (the (a) group) to which the recursion refers. This + would be the same if an absolute reference (?1) was used. In other + words, relative references are just a shorthand for computing a group + number. + + It is also possible to refer to subsequently opened parentheses, by + writing references such as (?+2). However, these cannot be recursive + because the reference is not inside the parentheses that are refer- + enced. They are always non-recursive subroutine calls, as described in + the next section. + + An alternative approach is to use named parentheses. The Perl syntax + for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup- + ported. We could rewrite the above example as follows: + + (? \( ( [^()]++ | (?&pn) )* \) ) + + If there is more than one subpattern with the same name, the earliest + one is used. + + The example pattern that we have been looking at contains nested unlim- + ited repeats, and so the use of a possessive quantifier for matching + strings of non-parentheses is important when applying the pattern to + strings that do not match. For example, when this pattern is applied to + + (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() + + it yields "no match" quickly. However, if a possessive quantifier is + not used, the match runs for a very long time indeed because there are + so many different ways the + and * repeats can carve up the subject, + and all have to be tested before failure can be reported. + + At the end of a match, the values of capturing parentheses are those + from the outermost level. If you want to obtain intermediate values, a + callout function can be used (see below and the pcre2callout documenta- + tion). If the pattern above is matched against + + (ab(cd)ef) + + the value for the inner capturing parentheses (numbered 2) is "ef", + which is the last value taken on at the top level. If a capturing sub- + pattern is not matched at the top level, its final captured value is + unset, even if it was (temporarily) set at a deeper level during the + matching process. + + If there are more than 15 capturing parentheses in a pattern, PCRE2 has + to obtain extra memory from the heap to store data during a recursion. + If no memory can be obtained, the match fails with the + PCRE2_ERROR_NOMEMORY error. + + Do not confuse the (?R) item with the condition (R), which tests for + recursion. Consider this pattern, which matches text in angle brack- + ets, allowing for arbitrary nesting. Only digits are allowed in nested + brackets (that is, when recursing), whereas any characters are permit- + ted at the outer level. + + < (?: (?(R) \d++ | [^<>]*+) | (?R)) * > + + In this pattern, (?(R) is the start of a conditional subpattern, with + two different alternatives for the recursive and non-recursive cases. + The (?R) item is the actual recursive call. + + Differences in recursion processing between PCRE2 and Perl + + Recursion processing in PCRE2 differs from Perl in two important ways. + In PCRE2 (like Python, but unlike Perl), a recursive subpattern call is + always treated as an atomic group. That is, once it has matched some of + the subject string, it is never re-entered, even if it contains untried + alternatives and there is a subsequent matching failure. This can be + illustrated by the following pattern, which purports to match a palin- + dromic string that contains an odd number of characters (for example, + "a", "aba", "abcba", "abcdcba"): + + ^(.|(.)(?1)\2)$ + + The idea is that it either matches a single character, or two identical + characters surrounding a sub-palindrome. In Perl, this pattern works; + in PCRE2 it does not if the pattern is longer than three characters. + Consider the subject string "abcba": + + At the top level, the first character is matched, but as it is not at + the end of the string, the first alternative fails; the second alterna- + tive is taken and the recursion kicks in. The recursive call to subpat- + tern 1 successfully matches the next character ("b"). (Note that the + beginning and end of line tests are not part of the recursion). + + Back at the top level, the next character ("c") is compared with what + subpattern 2 matched, which was "a". This fails. Because the recursion + is treated as an atomic group, there are now no backtracking points, + and so the entire match fails. (Perl is able, at this point, to re- + enter the recursion and try the second alternative.) However, if the + pattern is written with the alternatives in the other order, things are + different: + + ^((.)(?1)\2|.)$ + + This time, the recursing alternative is tried first, and continues to + recurse until it runs out of characters, at which point the recursion + fails. But this time we do have another alternative to try at the + higher level. That is the big difference: in the previous case the + remaining alternative is at a deeper recursion level, which PCRE2 can- + not use. + + To change the pattern so that it matches all palindromic strings, not + just those with an odd number of characters, it is tempting to change + the pattern to this: + + ^((.)(?1)\2|.?)$ + + Again, this works in Perl, but not in PCRE2, and for the same reason. + When a deeper recursion has matched a single character, it cannot be + entered again in order to match an empty string. The solution is to + separate the two cases, and write out the odd and even cases as alter- + natives at the higher level: + + ^(?:((.)(?1)\2|)|((.)(?3)\4|.)) + + If you want to match typical palindromic phrases, the pattern has to + ignore all non-word characters, which can be done like this: + + ^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$ + + If run with the PCRE2_CASELESS option, this pattern matches phrases + such as "A man, a plan, a canal: Panama!" and it works in both PCRE2 + and Perl. Note the use of the possessive quantifier *+ to avoid back- + tracking into sequences of non-word characters. Without this, PCRE2 + takes a great deal longer (ten times or more) to match typical phrases, + and Perl takes so long that you think it has gone into a loop. + + WARNING: The palindrome-matching patterns above work only if the sub- + ject string does not start with a palindrome that is shorter than the + entire string. For example, although "abcba" is correctly matched, if + the subject is "ababa", PCRE2 finds the palindrome "aba" at the start, + then fails at top level because the end of the string does not follow. + Once again, it cannot jump back into the recursion to try other alter- + natives, so the entire match fails. + + The second way in which PCRE2 and Perl differ in their recursion pro- + cessing is in the handling of captured values. In Perl, when a subpat- + tern is called recursively or as a subpattern (see the next section), + it has no access to any values that were captured outside the recur- + sion, whereas in PCRE2 these values can be referenced. Consider this + pattern: + + ^(.)(\1|a(?2)) + + In PCRE2, this pattern matches "bab". The first capturing parentheses + match "b", then in the second group, when the back reference \1 fails + to match "b", the second alternative matches "a" and then recurses. In + the recursion, \1 does now match "b" and so the whole match succeeds. + In Perl, the pattern fails to match because inside the recursive call + \1 cannot access the externally set value. + + +SUBPATTERNS AS SUBROUTINES + + If the syntax for a recursive subpattern call (either by number or by + name) is used outside the parentheses to which it refers, it operates + like a subroutine in a programming language. The called subpattern may + be defined before or after the reference. A numbered reference can be + absolute or relative, as in these examples: + + (...(absolute)...)...(?2)... + (...(relative)...)...(?-1)... + (...(?+1)...(relative)... + + An earlier example pointed out that the pattern + + (sens|respons)e and \1ibility + + matches "sense and sensibility" and "response and responsibility", but + not "sense and responsibility". If instead the pattern + + (sens|respons)e and (?1)ibility + + is used, it does match "sense and responsibility" as well as the other + two strings. Another example is given in the discussion of DEFINE + above. + + All subroutine calls, whether recursive or not, are always treated as + atomic groups. That is, once a subroutine has matched some of the sub- + ject string, it is never re-entered, even if it contains untried alter- + natives and there is a subsequent matching failure. Any capturing + parentheses that are set during the subroutine call revert to their + previous values afterwards. + + Processing options such as case-independence are fixed when a subpat- + tern is defined, so if it is used as a subroutine, such options cannot + be changed for different calls. For example, consider this pattern: + + (abc)(?i:(?-1)) + + It matches "abcabc". It does not match "abcABC" because the change of + processing option does not affect the called subpattern. + + +ONIGURUMA SUBROUTINE SYNTAX + + For compatibility with Oniguruma, the non-Perl syntax \g followed by a + name or a number enclosed either in angle brackets or single quotes, is + an alternative syntax for referencing a subpattern as a subroutine, + possibly recursively. Here are two of the examples used above, rewrit- + ten using this syntax: + + (? \( ( (?>[^()]+) | \g )* \) ) + (sens|respons)e and \g'1'ibility + + PCRE2 supports an extension to Oniguruma: if a number is preceded by a + plus or a minus sign it is taken as a relative reference. For example: + + (abc)(?i:\g<-1>) + + Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not + synonymous. The former is a back reference; the latter is a subroutine + call. + + +CALLOUTS + + Perl has a feature whereby using the sequence (?{...}) causes arbitrary + Perl code to be obeyed in the middle of matching a regular expression. + This makes it possible, amongst other things, to extract different sub- + strings that match the same pair of parentheses when there is a repeti- + tion. + + PCRE2 provides a similar feature, but of course it cannot obey arbi- + trary Perl code. The feature is called "callout". The caller of PCRE2 + provides an external function by putting its entry point in a match + context using the function pcre2_set_callout(), and then passing that + context to pcre2_match() or pcre2_dfa_match(). If no match context is + passed, or if the callout entry point is set to NULL, callouts are dis- + abled. + + Within a regular expression, (?C) indicates a point at which the + external function is to be called. There are two kinds of callout: + those with a numerical argument and those with a string argument. (?C) + on its own with no argument is treated as (?C0). A numerical argument + allows the application to distinguish between different callouts. + String arguments were added for release 10.20 to make it possible for + script languages that use PCRE2 to embed short scripts within patterns + in a similar way to Perl. + + During matching, when PCRE2 reaches a callout point, the external func- + tion is called. It is provided with the number or string argument of + the callout, the position in the pattern, and one item of data that is + also set in the match block. The callout function may cause matching to + proceed, to backtrack, or to fail. + + By default, PCRE2 implements a number of optimizations at matching + time, and one side-effect is that sometimes callouts are skipped. If + you need all possible callouts to happen, you need to set options that + disable the relevant optimizations. More details, including a complete + description of the programming interface to the callout function, are + given in the pcre2callout documentation. + + Callouts with numerical arguments + + If you just want to have a means of identifying different callout + points, put a number less than 256 after the letter C. For example, + this pattern has two callout points: + + (?C1)abc(?C2)def + + If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical + callouts are automatically installed before each item in the pattern. + They are all numbered 255. If there is a conditional group in the pat- + tern whose condition is an assertion, an additional callout is inserted + just before the condition. An explicit callout may also be set at this + position, as in this example: + + (?(?C9)(?=a)abc|def) + + Note that this applies only to assertion conditions, not to other types + of condition. + + Callouts with string arguments + + A delimited string may be used instead of a number as a callout argu- + ment. The starting delimiter must be one of ` ' " ^ % # $ { and the + ending delimiter is the same as the start, except for {, where the end- + ing delimiter is }. If the ending delimiter is needed within the + string, it must be doubled. For example: + + (?C'ab ''c'' d')xyz(?C{any text})pqr + + The doubling is removed before the string is passed to the callout + function. + + +BACKTRACKING CONTROL + + Perl 5.10 introduced a number of "Special Backtracking Control Verbs", + which are still described in the Perl documentation as "experimental + and subject to change or removal in a future version of Perl". It goes + on to say: "Their usage in production code should be noted to avoid + problems during upgrades." The same remarks apply to the PCRE2 features + described in this section. + + The new verbs make use of what was previously invalid syntax: an open- + ing parenthesis followed by an asterisk. They are generally of the form + (*VERB) or (*VERB:NAME). Some verbs take either form, possibly behaving + differently depending on whether or not a name is present. + + By default, for compatibility with Perl, a name is any sequence of + characters that does not include a closing parenthesis. The name is not + processed in any way, and it is not possible to include a closing + parenthesis in the name. This can be changed by setting the + PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati- + ble. + + When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to + verb names and only an unescaped closing parenthesis terminates the + name. However, the only backslash items that are permitted are \Q, \E, + and sequences such as \x{100} that define character code points. Char- + acter type escapes such as \d are faulted. + + A closing parenthesis can be included in a name either as \) or between + \Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED + option is also set, unescaped whitespace in verb names is skipped, and + #-comments are recognized, exactly as in the rest of the pattern. + PCRE2_EXTENDED does not affect verb names unless PCRE2_ALT_VERBNAMES is + also set. + + The maximum length of a name is 255 in the 8-bit library and 65535 in + the 16-bit and 32-bit libraries. If the name is empty, that is, if the + closing parenthesis immediately follows the colon, the effect is as if + the colon were not there. Any number of these verbs may occur in a pat- + tern. + + Since these verbs are specifically related to backtracking, most of + them can be used only when the pattern is to be matched using the tra- + ditional matching function, because these use a backtracking algorithm. + With the exception of (*FAIL), which behaves like a failing negative + assertion, the backtracking control verbs cause an error if encountered + by the DFA matching function. + + The behaviour of these verbs in repeated groups, assertions, and in + subpatterns called as subroutines (whether or not recursively) is docu- + mented below. + + Optimizations that affect backtracking verbs + + PCRE2 contains some optimizations that are used to speed up matching by + running some checks at the start of each match attempt. For example, it + may know the minimum length of matching subject, or that a particular + character must be present. When one of these optimizations bypasses the + running of a match, any included backtracking verbs will not, of + course, be processed. You can suppress the start-of-match optimizations + by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com- + pile(), or by starting the pattern with (*NO_START_OPT). There is more + discussion of this option in the section entitled "Compiling a pattern" + in the pcre2api documentation. + + Experiments with Perl suggest that it too has similar optimizations, + sometimes leading to anomalous results. + + Verbs that act immediately + + The following verbs act as soon as they are encountered. They may not + be followed by a name. + + (*ACCEPT) + + This verb causes the match to end successfully, skipping the remainder + of the pattern. However, when it is inside a subpattern that is called + as a subroutine, only that subpattern is ended successfully. Matching + then continues at the outer level. If (*ACCEPT) in triggered in a posi- + tive assertion, the assertion succeeds; in a negative assertion, the + assertion fails. + + If (*ACCEPT) is inside capturing parentheses, the data so far is cap- + tured. For example: + + A((?:A|B(*ACCEPT)|C)D) + + This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap- + tured by the outer parentheses. + + (*FAIL) or (*F) + + This verb causes a matching failure, forcing backtracking to occur. It + is equivalent to (?!) but easier to read. The Perl documentation notes + that it is probably useful only when combined with (?{}) or (??{}). + Those are, of course, Perl features that are not present in PCRE2. The + nearest equivalent is the callout feature, as for example in this pat- + tern: + + a+(?C)(*FAIL) + + A match with the string "aaaa" always fails, but the callout is taken + before each backtrack happens (in this example, 10 times). + + Recording which path was taken + + There is one verb whose main purpose is to track how a match was + arrived at, though it also has a secondary use in conjunction with + advancing the match starting point (see (*SKIP) below). + + (*MARK:NAME) or (*:NAME) + + A name is always required with this verb. There may be as many + instances of (*MARK) as you like in a pattern, and their names do not + have to be unique. + + When a match succeeds, the name of the last-encountered (*MARK:NAME), + (*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed back to + the caller as described in the section entitled "Other information + about the match" in the pcre2api documentation. Here is an example of + pcre2test output, where the "mark" modifier requests the retrieval and + outputting of (*MARK) data: + + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XY + 0: XY + MK: A + XZ + 0: XZ + MK: B + + The (*MARK) name is tagged with "MK:" in this output, and in this exam- + ple it indicates which of the two alternatives matched. This is a more + efficient way of obtaining this information than putting each alterna- + tive in its own capturing parentheses. + + If a verb with a name is encountered in a positive assertion that is + true, the name is recorded and passed back if it is the last-encoun- + tered. This does not happen for negative assertions or failing positive + assertions. + + After a partial match or a failed match, the last encountered name in + the entire match process is returned. For example: + + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XP + No match, mark = B + + Note that in this unanchored example the mark is retained from the + match attempt that started at the letter "X" in the subject. Subsequent + match attempts starting at "P" and then with an empty string do not get + as far as the (*MARK) item, but nevertheless do not reset it. + + If you are interested in (*MARK) values after failed matches, you + should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to + ensure that the match is always attempted. + + Verbs that act after backtracking + + The following verbs do nothing when they are encountered. Matching con- + tinues with what follows, but if there is no subsequent match, causing + a backtrack to the verb, a failure is forced. That is, backtracking + cannot pass to the left of the verb. However, when one of these verbs + appears inside an atomic group (which includes any group that is called + as a subroutine) or in an assertion that is true, its effect is con- + fined to that group, because once the group has been matched, there is + never any backtracking into it. In this situation, backtracking has to + jump to the left of the entire atomic group or assertion. + + These verbs differ in exactly what kind of failure occurs when back- + tracking reaches them. The behaviour described below is what happens + when the verb is not in a subroutine or an assertion. Subsequent sec- + tions cover these special cases. + + (*COMMIT) + + This verb, which may not be followed by a name, causes the whole match + to fail outright if there is a later matching failure that causes back- + tracking to reach it. Even if the pattern is unanchored, no further + attempts to find a match by advancing the starting point take place. If + (*COMMIT) is the only backtracking verb that is encountered, once it + has been passed pcre2_match() is committed to finding a match at the + current starting point, or not at all. For example: + + a+(*COMMIT)b + + This matches "xxaab" but not "aacaab". It can be thought of as a kind + of dynamic anchor, or "I've started, so I must finish." The name of the + most recently passed (*MARK) in the path is passed back when (*COMMIT) + forces a match failure. + + If there is more than one backtracking verb in a pattern, a different + one that follows (*COMMIT) may be triggered first, so merely passing + (*COMMIT) during a match does not always guarantee that a match must be + at this starting point. + + Note that (*COMMIT) at the start of a pattern is not the same as an + anchor, unless PCRE2's start-of-match optimizations are turned off, as + shown in this output from pcre2test: + + re> /(*COMMIT)abc/ + data> xyzabc + 0: abc + data> + re> /(*COMMIT)abc/no_start_optimize + data> xyzabc + No match + + For the first pattern, PCRE2 knows that any match must start with "a", + so the optimization skips along the subject to "a" before applying the + pattern to the first set of data. The match attempt then succeeds. The + second pattern disables the optimization that skips along to the first + character. The pattern is now applied starting at "x", and so the + (*COMMIT) causes the match to fail without trying any other starting + points. + + (*PRUNE) or (*PRUNE:NAME) + + This verb causes the match to fail at the current starting position in + the subject if there is a later matching failure that causes backtrack- + ing to reach it. If the pattern is unanchored, the normal "bumpalong" + advance to the next starting character then happens. Backtracking can + occur as usual to the left of (*PRUNE), before it is reached, or when + matching to the right of (*PRUNE), but if there is no match to the + right, backtracking cannot cross (*PRUNE). In simple cases, the use of + (*PRUNE) is just an alternative to an atomic group or possessive quan- + tifier, but there are some uses of (*PRUNE) that cannot be expressed in + any other way. In an anchored pattern (*PRUNE) has the same effect as + (*COMMIT). + + The behaviour of (*PRUNE:NAME) is the not the same as + (*MARK:NAME)(*PRUNE). It is like (*MARK:NAME) in that the name is + remembered for passing back to the caller. However, (*SKIP:NAME) + searches only for names set with (*MARK), ignoring those set by + (*PRUNE) or (*THEN). + + (*SKIP) + + This verb, when given without a name, is like (*PRUNE), except that if + the pattern is unanchored, the "bumpalong" advance is not to the next + character, but to the position in the subject where (*SKIP) was encoun- + tered. (*SKIP) signifies that whatever text was matched leading up to + it cannot be part of a successful match. Consider: + + a+(*SKIP)b + + If the subject is "aaaac...", after the first match attempt fails + (starting at the first character in the string), the starting point + skips on to start the next attempt at "c". Note that a possessive quan- + tifer does not have the same effect as this example; although it would + suppress backtracking during the first match attempt, the second + attempt would start at the second character instead of skipping on to + "c". + + (*SKIP:NAME) + + When (*SKIP) has an associated name, its behaviour is modified. When it + is triggered, the previous path through the pattern is searched for the + most recent (*MARK) that has the same name. If one is found, the + "bumpalong" advance is to the subject position that corresponds to that + (*MARK) instead of to where (*SKIP) was encountered. If no (*MARK) with + a matching name is found, the (*SKIP) is ignored. + + Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It + ignores names that are set by (*PRUNE:NAME) or (*THEN:NAME). + + (*THEN) or (*THEN:NAME) + + This verb causes a skip to the next innermost alternative when back- + tracking reaches it. That is, it cancels any further backtracking + within the current alternative. Its name comes from the observation + that it can be used for a pattern-based if-then-else block: + + ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ... + + If the COND1 pattern matches, FOO is tried (and possibly further items + after the end of the group if FOO succeeds); on failure, the matcher + skips to the second alternative and tries COND2, without backtracking + into COND1. If that succeeds and BAR fails, COND3 is tried. If subse- + quently BAZ fails, there are no more alternatives, so there is a back- + track to whatever came before the entire group. If (*THEN) is not + inside an alternation, it acts like (*PRUNE). + + The behaviour of (*THEN:NAME) is the not the same as + (*MARK:NAME)(*THEN). It is like (*MARK:NAME) in that the name is + remembered for passing back to the caller. However, (*SKIP:NAME) + searches only for names set with (*MARK), ignoring those set by + (*PRUNE) and (*THEN). + + A subpattern that does not contain a | character is just a part of the + enclosing alternative; it is not a nested alternation with only one + alternative. The effect of (*THEN) extends beyond such a subpattern to + the enclosing alternative. Consider this pattern, where A, B, etc. are + complex pattern fragments that do not contain any | characters at this + level: + + A (B(*THEN)C) | D + + If A and B are matched, but there is a failure in C, matching does not + backtrack into A; instead it moves to the next alternative, that is, D. + However, if the subpattern containing (*THEN) is given an alternative, + it behaves differently: + + A (B(*THEN)C | (*FAIL)) | D + + The effect of (*THEN) is now confined to the inner subpattern. After a + failure in C, matching moves to (*FAIL), which causes the whole subpat- + tern to fail because there are no more alternatives to try. In this + case, matching does now backtrack into A. + + Note that a conditional subpattern is not considered as having two + alternatives, because only one is ever used. In other words, the | + character in a conditional subpattern has a different meaning. Ignoring + white space, consider: + + ^.*? (?(?=a) a | b(*THEN)c ) + + If the subject is "ba", this pattern does not match. Because .*? is + ungreedy, it initially matches zero characters. The condition (?=a) + then fails, the character "b" is matched, but "c" is not. At this + point, matching does not backtrack to .*? as might perhaps be expected + from the presence of the | character. The conditional subpattern is + part of the single alternative that comprises the whole pattern, and so + the match fails. (If there was a backtrack into .*?, allowing it to + match "b", the match would succeed.) + + The verbs just described provide four different "strengths" of control + when subsequent matching fails. (*THEN) is the weakest, carrying on the + match at the next alternative. (*PRUNE) comes next, failing the match + at the current starting position, but allowing an advance to the next + character (for an unanchored pattern). (*SKIP) is similar, except that + the advance may be more than one character. (*COMMIT) is the strongest, + causing the entire match to fail. + + More than one backtracking verb + + If more than one backtracking verb is present in a pattern, the one + that is backtracked onto first acts. For example, consider this pat- + tern, where A, B, etc. are complex pattern fragments: + + (A(*COMMIT)B(*THEN)C|ABD) + + If A matches but B fails, the backtrack to (*COMMIT) causes the entire + match to fail. However, if A and B match, but C fails, the backtrack to + (*THEN) causes the next alternative (ABD) to be tried. This behaviour + is consistent, but is not always the same as Perl's. It means that if + two or more backtracking verbs appear in succession, all the the last + of them has no effect. Consider this example: + + ...(*COMMIT)(*PRUNE)... + + If there is a matching failure to the right, backtracking onto (*PRUNE) + causes it to be triggered, and its action is taken. There can never be + a backtrack onto (*COMMIT). + + Backtracking verbs in repeated groups + + PCRE2 differs from Perl in its handling of backtracking verbs in + repeated groups. For example, consider: + + /(a(*COMMIT)b)+ac/ + + If the subject is "abac", Perl matches, but PCRE2 fails because the + (*COMMIT) in the second repeat of the group acts. + + Backtracking verbs in assertions + + (*FAIL) in an assertion has its normal effect: it forces an immediate + backtrack. + + (*ACCEPT) in a positive assertion causes the assertion to succeed with- + out any further processing. In a negative assertion, (*ACCEPT) causes + the assertion to fail without any further processing. + + The other backtracking verbs are not treated specially if they appear + in a positive assertion. In particular, (*THEN) skips to the next + alternative in the innermost enclosing group that has alternations, + whether or not this is within the assertion. + + Negative assertions are, however, different, in order to ensure that + changing a positive assertion into a negative assertion changes its + result. Backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes a neg- + ative assertion to be true, without considering any further alternative + branches in the assertion. Backtracking into (*THEN) causes it to skip + to the next enclosing alternative within the assertion (the normal be- + haviour), but if the assertion does not have such an alternative, + (*THEN) behaves like (*PRUNE). + + Backtracking verbs in subroutines + + These behaviours occur whether or not the subpattern is called recur- + sively. Perl's treatment of subroutines is different in some cases. + + (*FAIL) in a subpattern called as a subroutine has its normal effect: + it forces an immediate backtrack. + + (*ACCEPT) in a subpattern called as a subroutine causes the subroutine + match to succeed without any further processing. Matching then contin- + ues after the subroutine call. + + (*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine + cause the subroutine match to fail. + + (*THEN) skips to the next alternative in the innermost enclosing group + within the subpattern that has alternatives. If there is no such group + within the subpattern, (*THEN) causes the subroutine match to fail. + + +SEE ALSO + + pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3), + pcre2(3). + + +AUTHOR + + Philip Hazel + University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 27 December 2016 + Copyright (c) 1997-2016 University of Cambridge. +------------------------------------------------------------------------------ + + +PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3) + + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + +PCRE2 PERFORMANCE + + Two aspects of performance are discussed below: memory usage and pro- + cessing time. The way you express your pattern as a regular expression + can affect both of them. + + +COMPILED PATTERN MEMORY USAGE + + Patterns are compiled by PCRE2 into a reasonably efficient interpretive + code, so that most simple patterns do not use much memory. However, + there is one case where the memory usage of a compiled pattern can be + unexpectedly large. If a parenthesized subpattern has a quantifier with + a minimum greater than 1 and/or a limited maximum, the whole subpattern + is repeated in the compiled code. For example, the pattern + + (abc|def){2,4} + + is compiled as if it were + + (abc|def)(abc|def)((abc|def)(abc|def)?)? + + (Technical aside: It is done this way so that backtrack points within + each of the repetitions can be independently maintained.) + + For regular expressions whose quantifiers use only small numbers, this + is not usually a problem. However, if the numbers are large, and par- + ticularly if such repetitions are nested, the memory usage can become + an embarrassment. For example, the very simple pattern + + ((ab){1,1000}c){1,3} + + uses 51K bytes when compiled using the 8-bit library. When PCRE2 is + compiled with its default internal pointer size of two bytes, the size + limit on a compiled pattern is 64K code units in the 8-bit and 16-bit + libraries, and this is reached with the above pattern if the outer rep- + etition is increased from 3 to 4. PCRE2 can be compiled to use larger + internal pointers and thus handle larger compiled patterns, but it is + better to try to rewrite your pattern to use less memory if you can. + + One way of reducing the memory usage for such patterns is to make use + of PCRE2's "subroutine" facility. Re-writing the above pattern as + + ((ab)(?2){0,999}c)(?1){0,2} + + reduces the memory requirements to 18K, and indeed it remains under 20K + even with the outer repetition increased to 100. However, this pattern + is not exactly equivalent, because the "subroutine" calls are treated + as atomic groups into which there can be no backtracking if there is a + subsequent matching failure. Therefore, PCRE2 cannot do this kind of + rewriting automatically. Furthermore, there is a noticeable loss of + speed when executing the modified pattern. Nevertheless, if the atomic + grouping is not a problem and the loss of speed is acceptable, this + kind of rewriting will allow you to process patterns that PCRE2 cannot + otherwise handle. + + +STACK USAGE AT RUN TIME + + When pcre2_match() is used for matching, certain kinds of pattern can + cause it to use large amounts of the process stack. In some environ- + ments the default process stack is quite small, and if it runs out the + result is often SIGSEGV. Rewriting your pattern can often help. The + pcre2stack documentation discusses this issue in detail. + + +PROCESSING TIME + + Certain items in regular expression patterns are processed more effi- + ciently than others. It is more efficient to use a character class like + [aeiou] than a set of single-character alternatives such as + (a|e|i|o|u). In general, the simplest construction that provides the + required behaviour is usually the most efficient. Jeffrey Friedl's book + contains a lot of useful general discussion about optimizing regular + expressions for efficient performance. This document contains a few + observations about PCRE2. + + Using Unicode character properties (the \p, \P, and \X escapes) is + slow, because PCRE2 has to use a multi-stage table lookup whenever it + needs a character's property. If you can find an alternative pattern + that does not use character properties, it will probably be faster. + + By default, the escape sequences \b, \d, \s, and \w, and the POSIX + character classes such as [:alpha:] do not use Unicode properties, + partly for backwards compatibility, and partly for performance reasons. + However, you can set the PCRE2_UCP option or start the pattern with + (*UCP) if you want Unicode character properties to be used. This can + double the matching time for items such as \d, when matched with + pcre2_match(); the performance loss is less with a DFA matching func- + tion, and in both cases there is not much difference for \b. + + When a pattern begins with .* not in atomic parentheses, nor in paren- + theses that are the subject of a backreference, and the PCRE2_DOTALL + option is set, the pattern is implicitly anchored by PCRE2, since it + can match only at the start of a subject string. If the pattern has + multiple top-level branches, they must all be anchorable. The optimiza- + tion can be disabled by the PCRE2_NO_DOTSTAR_ANCHOR option, and is + automatically disabled if the pattern contains (*PRUNE) or (*SKIP). + + If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization, + because the dot metacharacter does not then match a newline, and if the + subject string contains newlines, the pattern may match from the char- + acter immediately following one of them instead of from the very start. + For example, the pattern + + .*second + + matches the subject "first\nand second" (where \n stands for a newline + character), with the match starting at the seventh character. In order + to do this, PCRE2 has to retry the match starting after every newline + in the subject. + + If you are using such a pattern with subject strings that do not con- + tain newlines, the best performance is obtained by setting + PCRE2_DOTALL, or starting the pattern with ^.* or ^.*? to indicate + explicit anchoring. That saves PCRE2 from having to scan along the sub- + ject looking for a newline to restart at. + + Beware of patterns that contain nested indefinite repeats. These can + take a long time to run when applied to a string that does not match. + Consider the pattern fragment + + ^(a+)* + + This can match "aaaa" in 16 different ways, and this number increases + very rapidly as the string gets longer. (The * repeat can match 0, 1, + 2, 3, or 4 times, and for each of those cases other than 0 or 4, the + + repeats can match different numbers of times.) When the remainder of + the pattern is such that the entire match is going to fail, PCRE2 has + in principle to try every possible variation, and this can take an + extremely long time, even for relatively short strings. + + An optimization catches some of the more simple cases such as + + (a+)*b + + where a literal character follows. Before embarking on the standard + matching procedure, PCRE2 checks that there is a "b" later in the sub- + ject string, and if there is not, it fails the match immediately. How- + ever, when there is no following literal this optimization cannot be + used. You can see the difference by comparing the behaviour of + + (a+)*\d + + with the pattern above. The former gives a failure almost instantly + when applied to a whole line of "a" characters, whereas the latter + takes an appreciable time with strings longer than about 20 characters. + + In many cases, the solution to this kind of performance issue is to use + an atomic group or a possessive quantifier. + + +AUTHOR + + Philip Hazel + University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 02 January 2015 + Copyright (c) 1997-2015 University of Cambridge. +------------------------------------------------------------------------------ + + +PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3) + + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + +SYNOPSIS + + #include + + int regcomp(regex_t *preg, const char *pattern, + int cflags); + + int regexec(const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[], int eflags); + + size_t regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size); + + void regfree(regex_t *preg); + + +DESCRIPTION + + This set of functions provides a POSIX-style API for the PCRE2 regular + expression 8-bit library. See the pcre2api documentation for a descrip- + tion of PCRE2's native API, which contains much additional functional- + ity. There are no POSIX-style wrappers for PCRE2's 16-bit and 32-bit + libraries. + + The functions described here are just wrapper functions that ultimately + call the PCRE2 native API. Their prototypes are defined in the + pcre2posix.h header file, and on Unix systems the library itself is + called libpcre2-posix.a, so can be accessed by adding -lpcre2-posix to + the command for linking an application that uses them. Because the + POSIX functions call the native ones, it is also necessary to add + -lpcre2-8. + + Those POSIX option bits that can reasonably be mapped to PCRE2 native + options have been implemented. In addition, the option REG_EXTENDED is + defined with the value zero. This has no effect, but since programs + that are written to the POSIX interface often use it, this makes it + easier to slot in PCRE2 as a replacement library. Other POSIX options + are not even defined. + + There are also some options that are not defined by POSIX. These have + been added at the request of users who want to make use of certain + PCRE2-specific features via the POSIX calling interface. + + When PCRE2 is called via these functions, it is only the API that is + POSIX-like in style. The syntax and semantics of the regular expres- + sions themselves are still those of Perl, subject to the setting of + various PCRE2 options, as described below. "POSIX-like in style" means + that the API approximates to the POSIX definition; it is not fully + POSIX-compatible, and in multi-unit encoding domains it is probably + even less compatible. + + The header for these functions is supplied as pcre2posix.h to avoid any + potential clash with other POSIX libraries. It can, of course, be + renamed or aliased as regex.h, which is the "correct" name. It provides + two structure types, regex_t for compiled internal forms, and reg- + match_t for returning captured substrings. It also defines some con- + stants whose names start with "REG_"; these are used for setting + options and identifying error codes. + + +COMPILING A PATTERN + + The function regcomp() is called to compile a pattern into an internal + form. The pattern is a C string terminated by a binary zero, and is + passed in the argument pattern. The preg argument is a pointer to a + regex_t structure that is used as a base for storing information about + the compiled regular expression. + + The argument cflags is either zero, or contains one or more of the bits + defined by the following macros: + + REG_DOTALL + + The PCRE2_DOTALL option is set when the regular expression is passed + for compilation to the native function. Note that REG_DOTALL is not + part of the POSIX standard. + + REG_ICASE + + The PCRE2_CASELESS option is set when the regular expression is passed + for compilation to the native function. + + REG_NEWLINE + + The PCRE2_MULTILINE option is set when the regular expression is passed + for compilation to the native function. Note that this does not mimic + the defined POSIX behaviour for REG_NEWLINE (see the following sec- + tion). + + REG_NOSUB + + When a pattern that is compiled with this flag is passed to regexec() + for matching, the nmatch and pmatch arguments are ignored, and no cap- + tured strings are returned. Versions of the PCRE library prior to 10.22 + used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no + longer happens because it disables the use of back references. + + REG_UCP + + The PCRE2_UCP option is set when the regular expression is passed for + compilation to the native function. This causes PCRE2 to use Unicode + properties when matchine \d, \w, etc., instead of just recognizing + ASCII values. Note that REG_UCP is not part of the POSIX standard. + + REG_UNGREEDY + + The PCRE2_UNGREEDY option is set when the regular expression is passed + for compilation to the native function. Note that REG_UNGREEDY is not + part of the POSIX standard. + + REG_UTF + + The PCRE2_UTF option is set when the regular expression is passed for + compilation to the native function. This causes the pattern itself and + all data strings used for matching it to be treated as UTF-8 strings. + Note that REG_UTF is not part of the POSIX standard. + + In the absence of these flags, no options are passed to the native + function. This means the the regex is compiled with PCRE2 default + semantics. In particular, the way it handles newline characters in the + subject string is the Perl way, not the POSIX way. Note that setting + PCRE2_MULTILINE has only some of the effects specified for REG_NEWLINE. + It does not affect the way newlines are matched by the dot metacharac- + ter (they are not) or by a negative class such as [^a] (they are). + + The yield of regcomp() is zero on success, and non-zero otherwise. The + preg structure is filled in on success, and one member of the structure + is public: re_nsub contains the number of capturing subpatterns in the + regular expression. Various error codes are defined in the header file. + + NOTE: If the yield of regcomp() is non-zero, you must not attempt to + use the contents of the preg structure. If, for example, you pass it to + regexec(), the result is undefined and your program is likely to crash. + + +MATCHING NEWLINE CHARACTERS + + This area is not simple, because POSIX and Perl take different views of + things. It is not possible to get PCRE2 to obey POSIX semantics, but + then PCRE2 was never intended to be a POSIX engine. The following table + lists the different possibilities for matching newline characters in + Perl and PCRE2: + + Default Change with + + . matches newline no PCRE2_DOTALL + newline matches [^a] yes not changeable + $ matches \n at end yes PCRE2_DOLLAR_ENDONLY + $ matches \n in middle no PCRE2_MULTILINE + ^ matches \n in middle no PCRE2_MULTILINE + + This is the equivalent table for a POSIX-compatible pattern matcher: + + Default Change with + + . matches newline yes REG_NEWLINE + newline matches [^a] yes REG_NEWLINE + $ matches \n at end no REG_NEWLINE + $ matches \n in middle no REG_NEWLINE + ^ matches \n in middle no REG_NEWLINE + + This behaviour is not what happens when PCRE2 is called via its POSIX + API. By default, PCRE2's behaviour is the same as Perl's, except that + there is no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 + and Perl, there is no way to stop newline from matching [^a]. + + Default POSIX newline handling can be obtained by setting PCRE2_DOTALL + and PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but + there is no way to make PCRE2 behave exactly as for the REG_NEWLINE + action. When using the POSIX API, passing REG_NEWLINE to PCRE2's reg- + comp() function causes PCRE2_MULTILINE to be passed to pcre2_compile(), + and REG_DOTALL passes PCRE2_DOTALL. There is no way to pass PCRE2_DOL- + LAR_ENDONLY. + + +MATCHING A PATTERN + + The function regexec() is called to match a compiled pattern preg + against a given string, which is by default terminated by a zero byte + (but see REG_STARTEND below), subject to the options in eflags. These + can be: + + REG_NOTBOL + + The PCRE2_NOTBOL option is set when calling the underlying PCRE2 match- + ing function. + + REG_NOTEMPTY + + The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 + matching function. Note that REG_NOTEMPTY is not part of the POSIX + standard. However, setting this option can give more POSIX-like behav- + iour in some situations. + + REG_NOTEOL + + The PCRE2_NOTEOL option is set when calling the underlying PCRE2 match- + ing function. + + REG_STARTEND + + The string is considered to start at string + pmatch[0].rm_so and to + have a terminating NUL located at string + pmatch[0].rm_eo (there need + not actually be a NUL at that location), regardless of the value of + nmatch. This is a BSD extension, compatible with but not specified by + IEEE Standard 1003.2 (POSIX.2), and should be used with caution in + software intended to be portable to other systems. Note that a non-zero + rm_so does not imply REG_NOTBOL; REG_STARTEND affects only the location + of the string, not how it is matched. Setting REG_STARTEND and passing + pmatch as NULL are mutually exclusive; the error REG_INVARG is + returned. + + If the pattern was compiled with the REG_NOSUB flag, no data about any + matched strings is returned. The nmatch and pmatch arguments of + regexec() are ignored (except possibly as input for REG_STARTEND). + + The value of nmatch may be zero, and the value pmatch may be NULL + (unless REG_STARTEND is set); in both these cases no data about any + matched strings is returned. + + Otherwise, the portion of the string that was matched, and also any + captured substrings, are returned via the pmatch argument, which points + to an array of nmatch structures of type regmatch_t, containing the + members rm_so and rm_eo. These contain the byte offset to the first + character of each substring and the offset to the first character after + the end of each substring, respectively. The 0th element of the vector + relates to the entire portion of string that was matched; subsequent + elements relate to the capturing subpatterns of the regular expression. + Unused entries in the array have both structure members set to -1. + + A successful match yields a zero return; various error codes are + defined in the header file, of which REG_NOMATCH is the "expected" + failure code. + + +ERROR MESSAGES + + The regerror() function maps a non-zero errorcode from either regcomp() + or regexec() to a printable message. If preg is not NULL, the error + should have arisen from the use of that structure. A message terminated + by a binary zero is placed in errbuf. If the buffer is too short, only + the first errbuf_size - 1 characters of the error message are used. The + yield of the function is the size of buffer needed to hold the whole + message, including the terminating zero. This value is greater than + errbuf_size if the message was truncated. + + +MEMORY USAGE + + Compiling a regular expression causes memory to be allocated and asso- + ciated with the preg structure. The function regfree() frees all such + memory, after which preg may no longer be used as a compiled expres- + sion. + + +AUTHOR + + Philip Hazel + University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 31 January 2016 + Copyright (c) 1997-2016 University of Cambridge. +------------------------------------------------------------------------------ + + +PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3) + + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + +PCRE2 SAMPLE PROGRAM + + A simple, complete demonstration program to get you started with using + PCRE2 is supplied in the file pcre2demo.c in the src directory in the + PCRE2 distribution. A listing of this program is given in the pcre2demo + documentation. If you do not have a copy of the PCRE2 distribution, you + can save this listing to re-create the contents of pcre2demo.c. + + The demonstration program compiles the regular expression that is its + first argument, and matches it against the subject string in its second + argument. No PCRE2 options are set, and default character tables are + used. If matching succeeds, the program outputs the portion of the sub- + ject that matched, together with the contents of any captured sub- + strings. + + If the -g option is given on the command line, the program then goes on + to check for further matches of the same regular expression in the same + subject string. The logic is a little bit tricky because of the possi- + bility of matching an empty string. Comments in the code explain what + is going on. + + The code in pcre2demo.c is an 8-bit program that uses the PCRE2 8-bit + library. It handles strings and characters that are stored in 8-bit + code units. By default, one character corresponds to one code unit, + but if the pattern starts with "(*UTF)", both it and the subject are + treated as UTF-8 strings, where characters may occupy multiple code + units. + + If PCRE2 is installed in the standard include and library directories + for your operating system, you should be able to compile the demonstra- + tion program using a command like this: + + cc -o pcre2demo pcre2demo.c -lpcre2-8 + + If PCRE2 is installed elsewhere, you may need to add additional options + to the command line. For example, on a Unix-like system that has PCRE2 + installed in /usr/local, you can compile the demonstration program + using a command like this: + + cc -o pcre2demo -I/usr/local/include pcre2demo.c \ + -L/usr/local/lib -lpcre2-8 + + Once you have built the demonstration program, you can run simple tests + like this: + + ./pcre2demo 'cat|dog' 'the cat sat on the mat' + ./pcre2demo -g 'cat|dog' 'the dog sat on the cat' + + Note that there is a much more comprehensive test program, called + pcre2test, which supports many more facilities for testing regular + expressions using all three PCRE2 libraries (8-bit, 16-bit, and 32-bit, + though not all three need be installed). The pcre2demo program is pro- + vided as a relatively simple coding example. + + If you try to run pcre2demo when PCRE2 is not installed in the standard + library directory, you may get an error like this on some operating + systems (e.g. Solaris): + + ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file + or directory + + This is caused by the way shared library support works on those sys- + tems. You need to add + + -R/usr/local/lib + + (for example) to the compile command to get round this problem. + + +AUTHOR + + Philip Hazel + University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 02 February 2016 + Copyright (c) 1997-2016 University of Cambridge. +------------------------------------------------------------------------------ +PCRE2SERIALIZE(3) Library Functions Manual PCRE2SERIALIZE(3) + + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + +SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS + + int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint32_t *bytes, + pcre2_general_context *gcontext); + + int32_t pcre2_serialize_encode(pcre2_code **codes, + int32_t number_of_codes, uint32_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); + + void pcre2_serialize_free(uint8_t *bytes); + + int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); + + If you are running an application that uses a large number of regular + expression patterns, it may be useful to store them in a precompiled + form instead of having to compile them every time the application is + run. However, if you are using the just-in-time optimization feature, + it is not possible to save and reload the JIT data, because it is posi- + tion-dependent. The host on which the patterns are reloaded must be + running the same version of PCRE2, with the same code unit width, and + must also have the same endianness, pointer width and PCRE2_SIZE type. + For example, patterns compiled on a 32-bit system using PCRE2's 16-bit + library cannot be reloaded on a 64-bit system, nor can they be reloaded + using the 8-bit library. + + +SECURITY CONCERNS + + The facility for saving and restoring compiled patterns is intended for + use within individual applications. As such, the data supplied to + pcre2_serialize_decode() is expected to be trusted data, not data from + arbitrary external sources. There is only some simple consistency + checking, not complete validation of what is being re-loaded. + + +SAVING COMPILED PATTERNS + + Before compiled patterns can be saved they must be serialized, that is, + converted to a stream of bytes. A single byte stream may contain any + number of compiled patterns, but they must all use the same character + tables. A single copy of the tables is included in the byte stream (its + size is 1088 bytes). For more details of character tables, see the sec- + tion on locale support in the pcre2api documentation. + + The function pcre2_serialize_encode() creates a serialized byte stream + from a list of compiled patterns. Its first two arguments specify the + list, being a pointer to a vector of pointers to compiled patterns, and + the length of the vector. The third and fourth arguments point to vari- + ables which are set to point to the created byte stream and its length, + respectively. The final argument is a pointer to a general context, + which can be used to specify custom memory mangagement functions. If + this argument is NULL, malloc() is used to obtain memory for the byte + stream. The yield of the function is the number of serialized patterns, + or one of the following negative error codes: + + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL + + PCRE2_ERROR_BADMAGIC means either that a pattern's code has been cor- + rupted, or that a slot in the vector does not point to a compiled pat- + tern. + + Once a set of patterns has been serialized you can save the data in any + appropriate manner. Here is sample code that compiles two patterns and + writes them to a file. It assumes that the variable fd refers to a file + that is open for output. The error checking that should be present in a + real application has been omitted for simplicity. + + int errorcode; + uint8_t *bytes; + PCRE2_SIZE erroroffset; + PCRE2_SIZE bytescount; + pcre2_code *list_of_codes[2]; + list_of_codes[0] = pcre2_compile("first pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + list_of_codes[1] = pcre2_compile("second pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + errorcode = pcre2_serialize_encode(list_of_codes, 2, &bytes, + &bytescount, NULL); + errorcode = fwrite(bytes, 1, bytescount, fd); + + Note that the serialized data is binary data that may contain any of + the 256 possible byte values. On systems that make a distinction + between binary and non-binary data, be sure that the file is opened for + binary output. + + Serializing a set of patterns leaves the original data untouched, so + they can still be used for matching. Their memory must eventually be + freed in the usual way by calling pcre2_code_free(). When you have fin- + ished with the byte stream, it too must be freed by calling pcre2_seri- + alize_free(). + + +RE-USING PRECOMPILED PATTERNS + + In order to re-use a set of saved patterns you must first make the + serialized byte stream available in main memory (for example, by read- + ing from a file). The management of this memory block is up to the + application. You can use the pcre2_serialize_get_number_of_codes() + function to find out how many compiled patterns are in the serialized + data without actually decoding the patterns: + + uint8_t *bytes = ; + int32_t number_of_codes = pcre2_serialize_get_number_of_codes(bytes); + + The pcre2_serialize_decode() function reads a byte stream and recreates + the compiled patterns in new memory blocks, setting pointers to them in + a vector. The first two arguments are a pointer to a suitable vector + and its length, and the third argument points to a byte stream. The + final argument is a pointer to a general context, which can be used to + specify custom memory mangagement functions for the decoded patterns. + If this argument is NULL, malloc() and free() are used. After deserial- + ization, the byte stream is no longer needed and can be discarded. + + int32_t number_of_codes; + pcre2_code *list_of_codes[2]; + uint8_t *bytes = ; + int32_t number_of_codes = + pcre2_serialize_decode(list_of_codes, 2, bytes, NULL); + + If the vector is not large enough for all the patterns in the byte + stream, it is filled with those that fit, and the remainder are + ignored. The yield of the function is the number of decoded patterns, + or one of the following negative error codes: + + PCRE2_ERROR_BADDATA second argument is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in the data + PCRE2_ERROR_BADMODE mismatch of code unit size or PCRE2 version + PCRE2_ERROR_BADSERIALIZEDDATA other sanity check failure + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_NULL first or third argument is NULL + + PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was + compiled on a system with different endianness. + + Decoded patterns can be used for matching in the usual way, and must be + freed by calling pcre2_code_free(). However, be aware that there is a + potential race issue if you are using multiple patterns that were + decoded from a single byte stream in a multithreaded application. A + single copy of the character tables is used by all the decoded patterns + and a reference count is used to arrange for its memory to be automati- + cally freed when the last pattern is freed, but there is no locking on + this reference count. Therefore, if you want to call pcre2_code_free() + for these patterns in different threads, you must arrange your own + locking, and ensure that pcre2_code_free() cannot be called by two + threads at the same time. + + If a pattern was processed by pcre2_jit_compile() before being serial- + ized, the JIT data is discarded and so is no longer available after a + save/restore cycle. You can, however, process a restored pattern with + pcre2_jit_compile() if you wish. + + +AUTHOR + + Philip Hazel + University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 24 May 2016 + Copyright (c) 1997-2016 University of Cambridge. +------------------------------------------------------------------------------ + + +PCRE2STACK(3) Library Functions Manual PCRE2STACK(3) + + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + +PCRE2 DISCUSSION OF STACK USAGE + + When you call pcre2_match(), it makes use of an internal function + called match(). This calls itself recursively at branch points in the + pattern, in order to remember the state of the match so that it can + back up and try a different alternative after a failure. As matching + proceeds deeper and deeper into the tree of possibilities, the recur- + sion depth increases. The match() function is also called in other cir- + cumstances, for example, whenever a parenthesized sub-pattern is + entered, and in certain cases of repetition. + + Not all calls of match() increase the recursion depth; for an item such + as a* it may be called several times at the same level, after matching + different numbers of a's. Furthermore, in a number of cases where the + result of the recursive call would immediately be passed back as the + result of the current call (a "tail recursion"), the function is just + restarted instead. + + Each time the internal match() function is called recursively, it uses + memory from the process stack. For certain kinds of pattern and data, + very large amounts of stack may be needed, despite the recognition of + "tail recursion". Note that if PCRE2 is compiled with the -fsani- + tize=address option of the GCC compiler, the stack requirements are + greatly increased. + + The above comments apply when pcre2_match() is run in its normal inter- + pretive manner. If the compiled pattern was processed by pcre2_jit_com- + pile(), and just-in-time compiling was successful, and the options + passed to pcre2_match() were not incompatible, the matching process + uses the JIT-compiled code instead of the match() function. In this + case, the memory requirements are handled entirely differently. See the + pcre2jit documentation for details. + + The pcre2_dfa_match() function operates in a different way to + pcre2_match(), and uses recursion only when there is a regular expres- + sion recursion or subroutine call in the pattern. This includes the + processing of assertion and "once-only" subpatterns, which are handled + like subroutine calls. Normally, these are never very deep, and the + limit on the complexity of pcre2_dfa_match() is controlled by the + amount of workspace it is given. However, it is possible to write pat- + terns with runaway infinite recursions; such patterns will cause + pcre2_dfa_match() to run out of stack unless a limit is applied (see + below). + + The comments in the next three sections do not apply to + pcre2_dfa_match(); they are relevant only for pcre2_match() without the + JIT optimization. + + Reducing pcre2_match()'s stack usage + + You can often reduce the amount of recursion, and therefore the amount + of stack used, by modifying the pattern that is being matched. Con- + sider, for example, this pattern: + + ([^<]|<(?!inet))+ + + It matches from wherever it starts until it encounters "...) named capturing group (Perl) + (?'name'...) named capturing group (Perl) + (?P...) named capturing group (Python) + (?:...) non-capturing group + (?|...) non-capturing group; reset group numbers for + capturing groups in each alternative + + +ATOMIC GROUPS + + (?>...) atomic, non-capturing group + + +COMMENT + + (?#....) comment (not nestable) + + +OPTION SETTING + + (?i) caseless + (?J) allow duplicate names + (?m) multiline + (?s) single line (dotall) + (?U) default ungreedy (lazy) + (?x) extended (ignore white space) + (?-...) unset option(s) + + The following are recognized only at the very start of a pattern or + after one of the newline or \R options with similar syntax. More than + one of them may appear. + + (*LIMIT_MATCH=d) set the match limit to d (decimal number) + (*LIMIT_RECURSION=d) set the recursion limit to d (decimal number) + (*NOTEMPTY) set PCRE2_NOTEMPTY when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) + (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR) + (*NO_JIT) disable JIT optimization + (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE2_UCP (use Unicode properties for \d etc) + + Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of + the limits set by the caller of pcre2_match() or pcre2_dfa_match(), not + increase them. The application can lock out the use of (*UTF) and + (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, + respectively, at compile time. + + +NEWLINE CONVENTION + + These are recognized only at the very start of the pattern or after + option settings with a similar syntax. + + (*CR) carriage return only + (*LF) linefeed only + (*CRLF) carriage return followed by linefeed + (*ANYCRLF) all three of the above + (*ANY) any Unicode newline sequence + + +WHAT \R MATCHES + + These are recognized only at the very start of the pattern or after + option setting with a similar syntax. + + (*BSR_ANYCRLF) CR, LF, or CRLF + (*BSR_UNICODE) any Unicode newline sequence + + +LOOKAHEAD AND LOOKBEHIND ASSERTIONS + + (?=...) positive look ahead + (?!...) negative look ahead + (?<=...) positive look behind + (? reference by name (Perl) + \k'name' reference by name (Perl) + \g{name} reference by name (Perl) + \k{name} reference by name (.NET) + (?P=name) reference by name (Python) + + +SUBROUTINE REFERENCES (POSSIBLY RECURSIVE) + + (?R) recurse whole pattern + (?n) call subpattern by absolute number + (?+n) call subpattern by relative number + (?-n) call subpattern by relative number + (?&name) call subpattern by name (Perl) + (?P>name) call subpattern by name (Python) + \g call subpattern by name (Oniguruma) + \g'name' call subpattern by name (Oniguruma) + \g call subpattern by absolute number (Oniguruma) + \g'n' call subpattern by absolute number (Oniguruma) + \g<+n> call subpattern by relative number (PCRE2 extension) + \g'+n' call subpattern by relative number (PCRE2 extension) + \g<-n> call subpattern by relative number (PCRE2 extension) + \g'-n' call subpattern by relative number (PCRE2 extension) + + +CONDITIONAL PATTERNS + + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) + + (?(n) absolute reference condition + (?(+n) relative reference condition + (?(-n) relative reference condition + (?() named reference condition (Perl) + (?('name') named reference condition (Perl) + (?(name) named reference condition (PCRE2, deprecated) + (?(R) overall recursion condition + (?(Rn) specific numbered group recursion condition + (?(R&name) specific named group recursion condition + (?(DEFINE) define subpattern for reference + (?(VERSION[>]=n.m) test PCRE2 version + (?(assert) assertion condition + + Note the ambiguity of (?(R) and (?(Rn) which might be named reference + conditions or recursion tests. Such a condition is interpreted as a + reference condition if the relevant named group exists. + + +BACKTRACKING CONTROL + + The following act immediately they are reached: + + (*ACCEPT) force successful match + (*FAIL) force backtrack; synonym (*F) + (*MARK:NAME) set name to be passed back; synonym (*:NAME) + + The following act only when a subsequent match failure causes a back- + track to reach them. They all force a match failure, but they differ in + what happens afterwards. Those that advance the start-of-match point do + so only if the pattern is not anchored. + + (*COMMIT) overall failure, no advance of starting point + (*PRUNE) advance to next starting character + (*PRUNE:NAME) equivalent to (*MARK:NAME)(*PRUNE) + (*SKIP) advance to current matching position + (*SKIP:NAME) advance to position corresponding to an earlier + (*MARK:NAME); if not found, the (*SKIP) is ignored + (*THEN) local failure, backtrack to next alternation + (*THEN:NAME) equivalent to (*MARK:NAME)(*THEN) + + +CALLOUTS + + (?C) callout (assumed number 0) + (?Cn) callout with numerical data n + (?C"text") callout with string data + + The allowed string delimiters are ` ' " ^ % # $ (which are the same for + the start and the end), and the starting delimiter { matched with the + ending delimiter }. To encode the ending delimiter within the string, + double it. + + +SEE ALSO + + pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3), + pcre2(3). + + +AUTHOR + + Philip Hazel + University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 23 December 2016 + Copyright (c) 1997-2016 University of Cambridge. +------------------------------------------------------------------------------ + + PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) @@ -5021,58 +9884,74 @@ WIDE CHARACTERS AND UTF MODES In UTF modes, the dot metacharacter matches one UTF character instead of a single code unit. - The escape sequence \C can be used to match a single code unit, in a - UTF mode, but its use can lead to some strange effects because it - breaks up multi-unit characters (see the description of \C in the - pcre2pattern documentation). The use of \C is not supported in the - alternative matching function pcre2_dfa_match(), nor is it supported in - UTF mode by the JIT optimization. If JIT optimization is requested for - a UTF pattern that contains \C, it will not succeed, and so the match- - ing will be carried out by the normal interpretive function. + The escape sequence \C can be used to match a single code unit in a UTF + mode, but its use can lead to some strange effects because it breaks up + multi-unit characters (see the description of \C in the pcre2pattern + documentation). + + The use of \C is not supported by the alternative matching function + pcre2_dfa_match() when in UTF-8 or UTF-16 mode, that is, when a charac- + ter may consist of more than one code unit. The use of \C in these + modes provokes a match-time error. Also, the JIT optimization does not + support \C in these modes. If JIT optimization is requested for a UTF-8 + or UTF-16 pattern that contains \C, it will not succeed, and so when + pcre2_match() is called, the matching will be carried out by the normal + interpretive function. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test - characters of any code value, but, by default, the characters that - PCRE2 recognizes as digits, spaces, or word characters remain the same - set as in non-UTF mode, all with code points less than 256. This - remains true even when PCRE2 is built to include Unicode support, - because to do otherwise would slow down matching in many common cases. - Note that this also applies to \b and \B, because they are defined in - terms of \w and \W. If you want to test for a wider sense of, say, - "digit", you can use explicit Unicode property tests such as \p{Nd}. - Alternatively, if you set the PCRE2_UCP option, the way that the char- - acter escapes work is changed so that Unicode properties are used to + characters of any code value, but, by default, the characters that + PCRE2 recognizes as digits, spaces, or word characters remain the same + set as in non-UTF mode, all with code points less than 256. This + remains true even when PCRE2 is built to include Unicode support, + because to do otherwise would slow down matching in many common cases. + Note that this also applies to \b and \B, because they are defined in + terms of \w and \W. If you want to test for a wider sense of, say, + "digit", you can use explicit Unicode property tests such as \p{Nd}. + Alternatively, if you set the PCRE2_UCP option, the way that the char- + acter escapes work is changed so that Unicode properties are used to determine which characters match. There are more details in the section on generic character types in the pcre2pattern documentation. - Similarly, characters that match the POSIX named character classes are + Similarly, characters that match the POSIX named character classes are all low-valued characters, unless the PCRE2_UCP option is set. - However, the special horizontal and vertical white space matching + However, the special horizontal and vertical white space matching escapes (\h, \H, \v, and \V) do match all the appropriate Unicode char- acters, whether or not PCRE2_UCP is set. - Case-insensitive matching in UTF mode makes use of Unicode properties. - A few Unicode characters such as Greek sigma have more than two code- + Case-insensitive matching in UTF mode makes use of Unicode properties. + A few Unicode characters such as Greek sigma have more than two code- points that are case-equivalent, and these are treated as such. VALIDITY OF UTF STRINGS - When the PCRE2_UTF option is set, the strings passed as patterns and + When the PCRE2_UTF option is set, the strings passed as patterns and subjects are (by default) checked for validity on entry to the relevant - functions. If an invalid UTF string is passed, an negative error code - is returned. The code unit offset to the offending character can be - extracted from the match data block by calling pcre2_get_startchar(), + functions. If an invalid UTF string is passed, an negative error code + is returned. The code unit offset to the offending character can be + extracted from the match data block by calling pcre2_get_startchar(), which is used for this purpose after a UTF error. UTF-16 and UTF-32 strings can indicate their endianness by special code - knows as a byte-order mark (BOM). The PCRE2 functions do not handle + knows as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting strings to be in host byte order. - The entire string is checked before any other processing takes place. + A UTF string is checked before any other processing takes place. In the + case of pcre2_match() and pcre2_dfa_match() calls with a non-zero + starting offset, the check is applied only to that part of the subject + that could be inspected during matching, and there is a check that the + starting offset points to the first code unit of a character or to the + end of the subject. If there are no lookbehind assertions in the pat- + tern, the check starts at the starting offset. Otherwise, it starts at + the length of the longest lookbehind before the starting offset, or at + the start of the subject if there are not that many characters before + the starting offset. Note that the sequences \b and \B are one-charac- + ter lookbehinds. + In addition to checking the format of the string, there is a check to ensure that all code points lie in the range U+0 to U+10FFFF, excluding - the surrogate area. The so-called "non-character" code points are not + the surrogate area. The so-called "non-character" code points are not excluded because Unicode corrigendum #9 makes it clear that they should not be. @@ -5169,9 +10048,9 @@ VALIDITY OF UTF STRINGS The following negative error codes are given for invalid UTF-16 strings: - PCRE_UTF16_ERR1 Missing low surrogate at end of string - PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate - PCRE_UTF16_ERR3 Isolated low surrogate + PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string + PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate follows high surrogate + PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate Errors in UTF-32 strings @@ -5179,8 +10058,8 @@ VALIDITY OF UTF STRINGS The following negative error codes are given for invalid UTF-32 strings: - PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff) - PCRE_UTF32_ERR2 Code point is greater than 0x10ffff + PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff) + PCRE2_ERROR_UTF32_ERR2 Code point is greater than 0x10ffff AUTHOR @@ -5192,8 +10071,8 @@ AUTHOR REVISION - Last updated: 23 November 2014 - Copyright (c) 1997-2014 University of Cambridge. + Last updated: 03 July 2016 + Copyright (c) 1997-2016 University of Cambridge. ------------------------------------------------------------------------------ diff --git a/pcre2/doc/pcre2_code_copy.3 b/pcre2/doc/pcre2_code_copy.3 new file mode 100644 index 000000000..09b47054d --- /dev/null +++ b/pcre2/doc/pcre2_code_copy.3 @@ -0,0 +1,31 @@ +.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. The +pointer to the character tables is copied, not the tables themselves (see +\fBpcre2_code_copy_with_tables()\fP). The yield of the function is NULL if +\fIcode\fP is NULL or if sufficient memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/pcre2/doc/pcre2_code_copy_with_tables.3 b/pcre2/doc/pcre2_code_copy_with_tables.3 new file mode 100644 index 000000000..cfbddb330 --- /dev/null +++ b/pcre2/doc/pcre2_code_copy_with_tables.3 @@ -0,0 +1,32 @@ +.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. +Unlike \fBpcre2_code_copy()\fP, a separate copy of the character tables is also +made, with the new code pointing to it. This memory will be automatically freed +when \fBpcre2_code_free()\fP is called. The yield of the function is NULL if +\fIcode\fP is NULL or if sufficient memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/pcre2/doc/pcre2_code_free.3 b/pcre2/doc/pcre2_code_free.3 index 3a1c7d885..5127081e3 100644 --- a/pcre2/doc/pcre2_code_free.3 +++ b/pcre2/doc/pcre2_code_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CODE_FREE 3 "21 October 2014" "PCRE2 10.00" +.TH PCRE2_CODE_FREE 3 "29 July 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -7,7 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API) .B #include .PP .nf -.B pcre2_code_free(pcre2_code *\fIcode\fP); +.B void pcre2_code_free(pcre2_code *\fIcode\fP); .fi . .SH DESCRIPTION diff --git a/pcre2/doc/pcre2_dfa_match.3 b/pcre2/doc/pcre2_dfa_match.3 index f45da0df7..d2132d514 100644 --- a/pcre2/doc/pcre2_dfa_match.3 +++ b/pcre2/doc/pcre2_dfa_match.3 @@ -1,4 +1,4 @@ -.TH PCRE2_DFA_MATCH 3 "12 May 2013" "PCRE2 10.00" +.TH PCRE2_DFA_MATCH 3 "23 December 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -33,8 +33,8 @@ is \fBpcre2_match()\fP.) The arguments for this function are: \fIwscount\fP Number of elements in the vector .sp For \fBpcre2_dfa_match()\fP, a match context is needed only if you want to set -up a callout function. The \fIlength\fP and \fIstartoffset\fP values are code -units, not characters. The options are: +up a callout function or specify the recursion limit. The \fIlength\fP and +\fIstartoffset\fP values are code units, not characters. The options are: .sp PCRE2_ANCHORED Match only at the first position PCRE2_NOTBOL Subject is not the beginning of a line diff --git a/pcre2/doc/pcre2_get_error_message.3 b/pcre2/doc/pcre2_get_error_message.3 index 9ff53420d..9378b1835 100644 --- a/pcre2/doc/pcre2_get_error_message.3 +++ b/pcre2/doc/pcre2_get_error_message.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_ERROR_MESSAGE 3 "21 October 2014" "PCRE2 10.00" +.TH PCRE2_GET_ERROR_MESSAGE 3 "17 June 2016" "PCRE2 10.22" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -23,7 +23,10 @@ errors are negative numbers. The arguments are: \fIbufflen\fP the length of the buffer (code units) .sp The function returns the length of the message, excluding the trailing zero, or -a negative error code if the buffer is too small. +the negative error code PCRE2_ERROR_NOMEMORY if the buffer is too small. In +this case, the returned message is truncated (but still with a trailing zero). +If \fIerrorcode\fP does not contain a recognized error code number, the +negative value PCRE2_ERROR_BADDATA is returned. .P There is a complete description of the PCRE2 native API in the .\" HREF diff --git a/pcre2/doc/pcre2_match_data_create.3 b/pcre2/doc/pcre2_match_data_create.3 index 2a92f0bcb..3b0a29e19 100644 --- a/pcre2/doc/pcre2_match_data_create.3 +++ b/pcre2/doc/pcre2_match_data_create.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_DATA_CREATE 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_MATCH_DATA_CREATE 3 "29 July 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -7,7 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API) .B #include .PP .nf -.B pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, .B " pcre2_general_context *\fIgcontext\fP);" .fi . diff --git a/pcre2/doc/pcre2_match_data_create_from_pattern.3 b/pcre2/doc/pcre2_match_data_create_from_pattern.3 index 83267d6f7..60bf77cc6 100644 --- a/pcre2/doc/pcre2_match_data_create_from_pattern.3 +++ b/pcre2/doc/pcre2_match_data_create_from_pattern.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "24 October 2014" "PCRE2 10.00" +.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "29 July 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -7,8 +7,8 @@ PCRE2 - Perl-compatible regular expressions (revised API) .B #include .PP .nf -.B pcre2_match_data_create_from_pattern(const pcre2_code *\fIcode\fP, -.B " pcre2_general_context *\fIgcontext\fP);" +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" .fi . .SH DESCRIPTION diff --git a/pcre2/doc/pcre2_pattern_info.3 b/pcre2/doc/pcre2_pattern_info.3 index 8424e6f58..575840bbf 100644 --- a/pcre2/doc/pcre2_pattern_info.3 +++ b/pcre2/doc/pcre2_pattern_info.3 @@ -1,4 +1,4 @@ -.TH PCRE2_PATTERN_INFO 3 "01 December 2014" "PCRE2 10.00" +.TH PCRE2_PATTERN_INFO 3 "21 November 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -30,19 +30,20 @@ request are as follows: PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only PCRE2_INFO_CAPTURECOUNT Number of capturing subpatterns PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL - PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1 PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information 0 nothing set 1 first code unit is set 2 start of string or after newline + PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1 + PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \eC PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0 - PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1 PCRE2_INFO_LASTCODETYPE Type of must-be-present information 0 nothing set 1 code unit is set + PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1 PCRE2_INFO_MATCHEMPTY 1 if the pattern can match an empty string, 0 otherwise PCRE2_INFO_MATCHLIMIT Match limit if set, @@ -50,8 +51,8 @@ request are as follows: PCRE2_INFO_MAXLOOKBEHIND Length (in characters) of the longest lookbehind assertion PCRE2_INFO_MINLENGTH Lower bound length of matching strings - PCRE2_INFO_NAMEENTRYSIZE Size of name table entries PCRE2_INFO_NAMECOUNT Number of named subpatterns + PCRE2_INFO_NAMEENTRYSIZE Size of name table entries PCRE2_INFO_NAMETABLE Pointer to name table PCRE2_CONFIG_NEWLINE Code for the newline sequence: PCRE2_NEWLINE_CR diff --git a/pcre2/doc/pcre2_serialize_decode.3 b/pcre2/doc/pcre2_serialize_decode.3 index b362fcdff..57304a59c 100644 --- a/pcre2/doc/pcre2_serialize_decode.3 +++ b/pcre2/doc/pcre2_serialize_decode.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SERIALIZE_DECODE 3 "19 January 2015" "PCRE2 10.10" +.TH PCRE2_SERIALIZE_DECODE 3 "02 September 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -8,7 +8,7 @@ PCRE2 - Perl-compatible regular expressions (revised API) .PP .nf .B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, const uint32_t *\fIbytes\fP," +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," .B " pcre2_general_context *\fIgcontext\fP);" .fi . diff --git a/pcre2/doc/pcre2_serialize_encode.3 b/pcre2/doc/pcre2_serialize_encode.3 index 57077eb1e..9c2963318 100644 --- a/pcre2/doc/pcre2_serialize_encode.3 +++ b/pcre2/doc/pcre2_serialize_encode.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SERIALIZE_ENCODE 3 "19 January 2015" "PCRE2 10.10" +.TH PCRE2_SERIALIZE_ENCODE 3 "02 September 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -7,8 +7,8 @@ PCRE2 - Perl-compatible regular expressions (revised API) .B #include .PP .nf -.B int32_t pcre2_serialize_encode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, uint32_t **\fIserialized_bytes\fP," +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," .B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" .fi . diff --git a/pcre2/doc/pcre2_set_max_pattern_length.3 b/pcre2/doc/pcre2_set_max_pattern_length.3 new file mode 100644 index 000000000..7aa01c775 --- /dev/null +++ b/pcre2/doc/pcre2_set_max_pattern_length.3 @@ -0,0 +1,31 @@ +.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "05 October 2016" "PCRE2 10.23" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets, in a compile context, the maximum text length (in code +units) of the pattern that can be compiled. The result is always zero. If a +longer pattern is passed to \fBpcre2_compile()\fP there is an immediate error +return. The default is effectively unlimited, being the largest value a +PCRE2_SIZE variable can hold. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/pcre2/doc/pcre2_set_offset_limit.3 b/pcre2/doc/pcre2_set_offset_limit.3 new file mode 100644 index 000000000..20fa1045d --- /dev/null +++ b/pcre2/doc/pcre2_set_offset_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_OFFSET_LIMIT 3 "22 September 2015" "PCRE2 10.21" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the offset limit field in a match context. The result is +always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/pcre2/doc/pcre2_substitute.3 b/pcre2/doc/pcre2_substitute.3 index edfcb0432..e69e0ccc0 100644 --- a/pcre2/doc/pcre2_substitute.3 +++ b/pcre2/doc/pcre2_substitute.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTITUTE 3 "11 November 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTITUTE 3 "12 December 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -47,20 +47,25 @@ units, not characters, as is the contents of the variable pointed at by \fIoutlengthptr\fP, which is updated to the actual length of the new string. The options are: .sp - PCRE2_ANCHORED Match only at the first position - PCRE2_NOTBOL Subject string is not the beginning of a line - PCRE2_NOTEOL Subject string is not the end of a line - PCRE2_NOTEMPTY An empty string is not a valid match - PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject - is not a valid match - PCRE2_NO_UTF_CHECK Do not check the subject or replacement for - UTF validity (only relevant if PCRE2_UTF - was set at compile time) - PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject + PCRE2_ANCHORED Match only at the first position + PCRE2_NOTBOL Subject is not the beginning of a line + PCRE2_NOTEOL Subject is not the end of a line + PCRE2_NOTEMPTY An empty string is not a valid match + PCRE2_NOTEMPTY_ATSTART An empty string at the start of the + subject is not a valid match + PCRE2_NO_UTF_CHECK Do not check the subject or replacement + for UTF validity (only relevant if + PCRE2_UTF was set at compile time) + PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing + PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length + PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset + PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string .sp The function returns the number of substitutions, which may be zero if there were no matches. The result can be greater than one only when -PCRE2_SUBSTITUTE_GLOBAL is set. +PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code +is returned. .P There is a complete description of the PCRE2 native API in the .\" HREF diff --git a/pcre2/doc/pcre2api.3 b/pcre2/doc/pcre2api.3 index 1147f89d4..e0a434af4 100644 --- a/pcre2/doc/pcre2api.3 +++ b/pcre2/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "22 April 2015" "PCRE2 10.20" +.TH PCRE2API 3 "24 December 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -20,13 +20,13 @@ document for an overview of all the PCRE2 documentation. .B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" .B " pcre2_compile_context *\fIccontext\fP);" .sp -.B pcre2_code_free(pcre2_code *\fIcode\fP); +.B void pcre2_code_free(pcre2_code *\fIcode\fP); .sp -.B pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, .B " pcre2_general_context *\fIgcontext\fP);" .sp -.B pcre2_match_data_create_from_pattern(const pcre2_code *\fIcode\fP, -.B " pcre2_general_context *\fIgcontext\fP);" +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" .sp .B int pcre2_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, .B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," @@ -90,6 +90,9 @@ document for an overview of all the PCRE2 documentation. .B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, .B " const unsigned char *\fItables\fP);" .sp +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.sp .B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, .B " uint32_t \fIvalue\fP);" .sp @@ -120,6 +123,9 @@ document for an overview of all the PCRE2 documentation. .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, .B " uint32_t \fIvalue\fP);" .sp +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.sp .B int pcre2_set_recursion_limit(pcre2_match_context *\fImcontext\fP, .B " uint32_t \fIvalue\fP);" .sp @@ -210,11 +216,11 @@ document for an overview of all the PCRE2 documentation. .sp .nf .B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, const uint32_t *\fIbytes\fP," +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," .B " pcre2_general_context *\fIgcontext\fP);" .sp -.B int32_t pcre2_serialize_encode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, uint32_t **\fIserialized_bytes\fP," +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," .B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" .sp .B void pcre2_serialize_free(uint8_t *\fIbytes\fP); @@ -227,6 +233,10 @@ document for an overview of all the PCRE2 documentation. .rs .sp .nf +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.sp .B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, .B " PCRE2_SIZE \fIbufflen\fP);" .sp @@ -346,9 +356,10 @@ More complicated programs might need to make use of the specialist functions \fBpcre2_jit_stack_create()\fP, \fBpcre2_jit_stack_free()\fP, and \fBpcre2_jit_stack_assign()\fP in order to control the JIT code's memory usage. .P -JIT matching is automatically used by \fBpcre2_match()\fP if it is available. -There is also a direct interface for JIT matching, which gives improved -performance. The JIT-specific functions are discussed in the +JIT matching is automatically used by \fBpcre2_match()\fP if it is available, +unless the PCRE2_NO_JIT option is set. There is also a direct interface for JIT +matching, which gives improved performance. The JIT-specific functions are +discussed in the .\" HREF \fBpcre2jit\fP .\" @@ -387,9 +398,16 @@ The function \fBpcre2_substitute()\fP can be called to match a pattern and return a copy of the subject string with substitutions for parts that were matched. .P +Functions whose names begin with \fBpcre2_serialize_\fP are used for saving +compiled patterns on disc or elsewhere, and reloading them later. +.P Finally, there are functions for finding out information about a compiled pattern (\fBpcre2_pattern_info()\fP) and about the configuration with which PCRE2 was built (\fBpcre2_config()\fP). +.P +Functions with names ending with \fB_free()\fP are used for freeing memory +blocks of various sorts. In all cases, if one of these functions is called with +a NULL argument, it does nothing. . . .SH "STRING LENGTHS AND OFFSETS" @@ -455,21 +473,53 @@ time ensuring that multithreaded applications can use it. .P There are several different blocks of data that are used to pass information between the application and the PCRE2 libraries. -.P -(1) A pointer to the compiled form of a pattern is returned to the user when +. +. +.SS "The compiled pattern" +.rs +.sp +A pointer to the compiled form of a pattern is returned to the user when \fBpcre2_compile()\fP is successful. The data in the compiled pattern is fixed, and does not change when the pattern is matched. Therefore, it is thread-safe, that is, the same compiled pattern can be used by more than one thread -simultaneously. An application can compile all its patterns at the start, -before forking off multiple threads that use them. However, if the just-in-time -optimization feature is being used, it needs separate memory stack areas for -each thread. See the +simultaneously. For example, an application can compile all its patterns at the +start, before forking off multiple threads that use them. However, if the +just-in-time optimization feature is being used, it needs separate memory stack +areas for each thread. See the .\" HREF \fBpcre2jit\fP .\" documentation for more details. .P -(2) The next section below introduces the idea of "contexts" in which PCRE2 +In a more complicated situation, where patterns are compiled only when they are +first needed, but are still shared between threads, pointers to compiled +patterns must be protected from simultaneous writing by multiple threads, at +least until a pattern has been compiled. The logic can be something like this: +.sp + Get a read-only (shared) lock (mutex) for pointer + if (pointer == NULL) + { + Get a write (unique) lock for pointer + pointer = pcre2_compile(... + } + Release the lock + Use pointer in pcre2_match() +.sp +Of course, testing for compilation errors should also be included in the code. +.P +If JIT is being used, but the JIT compilation is not being done immediately, +(perhaps waiting to see if the pattern is used often enough) similar logic is +required. JIT compilation updates a pointer within the compiled code block, so +a thread must gain unique write access to the pointer before calling +\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or +\fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the +compiled code. +. +. +.SS "Context blocks" +.rs +.sp +The next main section below introduces the idea of "contexts" in which PCRE2 functions are called. A context is nothing more than a collection of parameters that control the way PCRE2 operates. Grouping a number of parameters together in a context is a convenient way of passing them to a PCRE2 function without @@ -481,11 +531,15 @@ In a multithreaded application, if the parameters in a context are values that are never changed, the same context can be used by all the threads. However, if any thread needs to change any value in a context, it must make its own thread-specific copy. -.P -(3) The matching functions need a block of memory for working space and for -storing the results of a match. This includes details of what was matched, as -well as additional information such as the name of a (*MARK) setting. Each -thread must provide its own version of this memory. +. +. +.SS "Match blocks" +.rs +.sp +The matching functions need a block of memory for working space and for storing +the results of a match. This includes details of what was matched, as well as +additional information such as the name of a (*MARK) setting. Each thread must +provide its own copy of this memory. . . .SH "PCRE2 CONTEXTS" @@ -564,6 +618,7 @@ of the following compile-time parameters: PCRE2's character tables The newline character sequence The compile time nested parentheses limit + The maximum length of the pattern string An external function for stack checking .sp A compile context is also required if you are using custom memory management. @@ -607,6 +662,17 @@ argument is a general context. This function builds a set of character tables in the current locale. .sp .nf +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +This sets a maximum length, in code units, for the pattern string that is to be +compiled. If the pattern is longer, an error is generated. This facility is +provided so that applications that accept patterns from external sources can +limit their size. The default is the largest number that a PCRE2_SIZE variable +can hold, which is effectively unlimited. +.sp +.nf .B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, .B " uint32_t \fIvalue\fP);" .fi @@ -630,7 +696,8 @@ functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP. .sp This parameter ajusts the limit, set when PCRE2 is built (default 250), on the depth of parenthesis nesting in a pattern. This limit stops rogue patterns -using up too much system stack when being compiled. +using up too much system stack when being compiled. The limit applies to +parentheses of all kinds, not just capturing parentheses. .sp .nf .B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, @@ -659,8 +726,9 @@ A match context is required if you want to change the default values of any of the following match-time parameters: .sp A callout function - The limit for calling \fImatch()\fP - The limit for calling \fImatch()\fP recursively + The offset limit for matching an unanchored pattern + The limit for calling \fBmatch()\fP (see below) + The limit for calling \fBmatch()\fP recursively .sp A match context is also required if you are using custom memory management. If none of these apply, just pass NULL as the context argument of @@ -696,6 +764,32 @@ during a matching operation. Details are given in the documentation. .sp .nf +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +The \fIoffset_limit\fP parameter limits how far an unanchored search can +advance in the subject string. The default value is PCRE2_UNSET. The +\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP functions return +PCRE2_ERROR_NOMATCH if a match with a starting point before or at the given +offset is not found. For example, if the pattern /abc/ is matched against +"123abc" with an offset limit less than 3, the result is PCRE2_ERROR_NO_MATCH. +A match can never be found if the \fIstartoffset\fP argument of +\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP is greater than the offset +limit. +.P +When using this facility, you must set PCRE2_USE_OFFSET_LIMIT when calling +\fBpcre2_compile()\fP so that when JIT is in use, different code can be +compiled. If a match is started with a non-default match limit when +PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. +.P +The offset limit facility can be used to track progress when searching large +subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to +start within the first line of the subject. If this is set with an offset +limit, a match must occur in the first line and also within the offset limit. +In other words, whichever limit comes first is used. +.sp +.nf .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, .B " uint32_t \fIvalue\fP);" .fi @@ -746,20 +840,22 @@ This limit is of use only if it is set smaller than \fImatch_limit\fP. Limiting the recursion depth limits the amount of system stack that can be used, or, when PCRE2 has been compiled to use memory on the heap instead of the stack, the amount of heap memory that can be used. This limit is not relevant, -and is ignored, when matching is done using JIT compiled code or by the -\fBpcre2_dfa_match()\fP function. +and is ignored, when matching is done using JIT compiled code. However, it is +supported by \fBpcre2_dfa_match()\fP, which uses recursive function calls less +frequently than \fBpcre2_match()\fP, but which can be caused to use a lot of +stack by a recursive pattern such as /(.)(?1)/ matched to a very long string. .P The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the default default is the same value as the default for \fImatch_limit\fP. If the -limit is exceeded, \fBpcre2_match()\fP returns PCRE2_ERROR_RECURSIONLIMIT. A -value for the recursion limit may also be supplied by an item at the start of a -pattern of the form +limit is exceeded, \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP return +PCRE2_ERROR_RECURSIONLIMIT. A value for the recursion limit may also be +supplied by an item at the start of a pattern of the form .sp (*LIMIT_RECURSION=ddd) .sp where ddd is a decimal number. However, such a setting is ignored unless ddd is -less than the limit set by the caller of \fBpcre2_match()\fP or, if no such -limit is set, less than the default. +less than the limit set by the caller of \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP or, if no such limit is set, less than the default. .sp .nf .B int pcre2_set_recursion_memory_management( @@ -905,7 +1001,7 @@ The \fIwhere\fP argument should point to a buffer that is at least 24 code units long. (The exact length required can be found by calling \fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled without Unicode support, the buffer is filled with the text "Unicode not -supported". Otherwise, the Unicode version string (for example, "7.0.0") is +supported". Otherwise, the Unicode version string (for example, "8.0.0") is inserted. The number of code units used is returned. This is the length of the string plus one unit for the terminating zero. .sp @@ -933,35 +1029,69 @@ zero. .B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" .B " pcre2_compile_context *\fIccontext\fP);" .sp -.B pcre2_code_free(pcre2_code *\fIcode\fP); +.B void pcre2_code_free(pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); .fi .P The \fBpcre2_compile()\fP function compiles a pattern into an internal form. -The pattern is defined by a pointer to a string of code units and a length, If +The pattern is defined by a pointer to a string of code units and a length. If the pattern is zero-terminated, the length can be specified as PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that -contains the compiled pattern and related data. The caller must free the memory -by calling \fBpcre2_code_free()\fP when it is no longer needed. +contains the compiled pattern and related data, or NULL if an error occurred. +.P +If the compile context argument \fIccontext\fP is NULL, memory for the compiled +pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from +the same memory function that was used for the compile context. The caller must +free the memory by calling \fBpcre2_code_free()\fP when it is no longer needed. +.P +The function \fBpcre2_code_copy()\fP makes a copy of the compiled code in new +memory, using the same memory allocator as was used for the original. However, +if the code has been processed by the JIT compiler (see +.\" HTML +.\" +below), +.\" +the JIT information cannot be copied (because it is position-dependent). +The new copy can initially be used only for non-JIT matching, though it can be +passed to \fBpcre2_jit_compile()\fP if required. +.P +The \fBpcre2_code_copy()\fP function provides a way for individual threads in a +multithreaded application to acquire a private copy of shared compiled code. +However, it does not make a copy of the character tables used by the compiled +pattern; the new pattern code points to the same tables as the original code. +(See +.\" HTML +.\" +"Locale Support" +.\" +below for details of these character tables.) In many applications the same +tables are used throughout, so this behaviour is appropriate. Nevertheless, +there are occasions when a copy of a compiled pattern and the relevant tables +are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility. +Copies of both the code and the tables are made, with the new code pointing to +the new tables. The memory for the new tables is automatically freed when +\fBpcre2_code_free()\fP is called for the new copy of the compiled code. .P NOTE: When one of the matching functions is called, pointers to the compiled pattern and the subject string are set in the match data block so that they can -be referenced by the extraction functions. After running a match, you must not -free a compiled pattern (or a subject string) until after all operations on the +be referenced by the substring extraction functions. After running a match, you +must not free a compiled pattern (or a subject string) until after all +operations on the .\" HTML .\" match data block .\" have taken place. .P -If the compile context argument \fIccontext\fP is NULL, memory for the compiled -pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from -the same memory function that was used for the compile context. -.P -The \fIoptions\fP argument contains various bit settings that affect the -compilation. It should be zero if no options are required. The available -options are described below. Some of them (in particular, those that are -compatible with Perl, but some others as well) can also be set and unset from -within the pattern (see the detailed description in the +The \fIoptions\fP argument for \fBpcre2_compile()\fP contains various bit +settings that affect the compilation. It should be zero if no options are +required. The available options are described below. Some of them (in +particular, those that are compatible with Perl, but some others as well) can +also be set and unset from within the pattern (see the detailed description in +the .\" HREF \fBpcre2pattern\fP .\" @@ -980,13 +1110,28 @@ above). .\" .P If \fIerrorcode\fP or \fIerroroffset\fP is NULL, \fBpcre2_compile()\fP returns -NULL immediately. Otherwise, if compilation of a pattern fails, -\fBpcre2_compile()\fP returns NULL, having set these variables to an error code -and an offset (number of code units) within the pattern, respectively. The -\fBpcre2_get_error_message()\fP function provides a textual message for each -error code. Compilation errors are positive numbers, but UTF formatting errors -are negative numbers. For an invalid UTF-8 or UTF-16 string, the offset is that -of the first code unit of the failing character. +NULL immediately. Otherwise, the variables to which these point are set to an +error code and an offset (number of code units) within the pattern, +respectively, when \fBpcre2_compile()\fP returns NULL because a compilation +error has occurred. The values are not defined when compilation is successful +and \fBpcre2_compile()\fP returns a non-NULL value. +.P +The value returned in \fIerroroffset\fP is an indication of where in the +pattern the error occurred. It is not necessarily the furthest point in the +pattern that was read. For example, after the error "lookbehind assertion is +not fixed length", the error offset points to the start of the failing +assertion. +.P +The \fBpcre2_get_error_message()\fP function (see "Obtaining a textual error +message" +.\" HTML +.\" +below) +.\" +provides a textual message for each error code. Compilation errors have +positive error codes; UTF formatting error codes are negative. For an invalid +UTF-8 or UTF-16 string, the offset is that of the first code unit of the +failing character. .P Some errors are not detected until the whole pattern has been scanned; in these cases, the offset passed back is the length of the pattern. Note that the @@ -1052,12 +1197,24 @@ after any internal newline. However, it does not match after a newline at the end of the subject, for compatibility with Perl. If you want a multiline circumflex also to match after a terminating newline, you must set PCRE2_ALT_CIRCUMFLEX. +.sp + PCRE2_ALT_VERBNAMES +.sp +By default, for compatibility with Perl, the name in any verb sequence such as +(*MARK:NAME) is any sequence of characters that does not include a closing +parenthesis. The name is not processed in any way, and it is not possible to +include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES +option is set, normal backslash processing is applied to verb names and only an +unescaped closing parenthesis terminates the name. A closing parenthesis can be +included in a name either as \e) or between \eQ and \eE. If the PCRE2_EXTENDED +option is set, unescaped whitespace in verb names is skipped and #-comments are +recognized, exactly as in the rest of the pattern. .sp PCRE2_AUTO_CALLOUT .sp If this bit is set, \fBpcre2_compile()\fP automatically inserts callout items, -all with number 255, before each pattern item. For discussion of the callout -facility, see the +all with number 255, before each pattern item, except immediately before or +after a callout in the pattern. For discussion of the callout facility, see the .\" HREF \fBpcre2callout\fP .\" @@ -1130,7 +1287,10 @@ built. .sp If this option is set, an unanchored pattern is required to match before or at the first newline in the subject string, though the matched text may continue -over the newline. +over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a more +general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit, a +match must occur in the first line and also within the offset limit. In other +words, whichever limit comes first is used. .sp PCRE2_MATCH_UNSET_BACKREF .sp @@ -1168,7 +1328,8 @@ This option locks out the use of \eC in the pattern that is being compiled. This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because it may leave the current matching point in the middle of a multi-code-unit character. This option may be useful in applications that process patterns from -external sources. +external sources. Note that there is also a build-time option that permanently +locks out the use of \eC. .sp PCRE2_NEVER_UCP .sp @@ -1194,7 +1355,9 @@ If this option is set, it disables the use of numbered capturing parentheses in the pattern. Any opening parenthesis that is not followed by ? behaves as if it were followed by ?: but named parentheses can still be used for capturing (and they acquire numbers in the usual way). There is no equivalent of this option -in Perl. +in Perl. Note that, if this option is set, references to capturing groups (back +references or recursion/subroutine calls) may only refer to named groups, +though the reference can be by name or by number. .sp PCRE2_NO_AUTO_POSSESS .sp @@ -1323,6 +1486,20 @@ support. This option inverts the "greediness" of the quantifiers so that they are not greedy by default, but become greedy if followed by "?". It is not compatible with Perl. It can also be set by a (?U) option setting within the pattern. +.sp + PCRE2_USE_OFFSET_LIMIT +.sp +This option must be set for \fBpcre2_compile()\fP if +\fBpcre2_set_offset_limit()\fP is going to be used to set a non-default offset +limit in a match context for matches that use this pattern. An error is +generated if an offset limit is set without this option. For more details, see +the description of \fBpcre2_set_offset_limit()\fP in the +.\" HTML +.\" +section +.\" +that describes match contexts. See also the PCRE2_FIRSTLINE +option above. .sp PCRE2_UTF .sp @@ -1341,17 +1518,24 @@ page. .SH "COMPILATION ERROR CODES" .rs .sp -There are over 80 positive error codes that \fBpcre2_compile()\fP may return if -it finds an error in the pattern. There are also some negative error codes that -are used for invalid UTF strings. These are the same as given by -\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and are described in the +There are over 80 positive error codes that \fBpcre2_compile()\fP may return +(via \fIerrorcode\fP) if it finds an error in the pattern. There are also some +negative error codes that are used for invalid UTF strings. These are the same +as given by \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and are described +in the .\" HREF \fBpcre2unicode\fP .\" -page. The \fBpcre2_get_error_message()\fP function can be called to obtain a -textual error message from any error code. +page. The \fBpcre2_get_error_message()\fP function (see "Obtaining a textual +error message" +.\" HTML +.\" +below) +.\" +can be called to obtain a textual error message from any error code. . . +.\" HTML .SH "JUST-IN-TIME (JIT) COMPILATION" .rs .sp @@ -1490,11 +1674,14 @@ are as follows: Return a copy of the pattern's options. The third argument should point to a \fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns -the compile options as modified by any top-level option settings at the start -of the pattern itself. In other words, they are the options that will be in -force when matching starts. For example, if the pattern /(?im)abc(?-i)d/ is -compiled with the PCRE2_EXTENDED option, the result is PCRE2_CASELESS, -PCRE2_MULTILINE, and PCRE2_EXTENDED. +the compile options as modified by any top-level (*XXX) option settings such as +(*UTF) at the start of the pattern itself. +.P +For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED +option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. +Option settings such as (?i) that can change within a pattern do not affect the +result of PCRE2_INFO_ALLOPTIONS, even if they appear right at the start of the +pattern. (This was different in some earlier releases.) .P A pattern compiled without PCRE2_ANCHORED is automatically anchored by PCRE2 if the first significant item in every top-level branch is one of the following: @@ -1537,17 +1724,27 @@ matches only CR, LF, or CRLF. .sp PCRE2_INFO_CAPTURECOUNT .sp -Return the number of capturing subpatterns in the pattern. The third argument -should point to an \fBuint32_t\fP variable. +Return the highest capturing subpattern number in the pattern. In patterns +where (?| is not used, this is also the total number of capturing subpatterns. +The third argument should point to an \fBuint32_t\fP variable. +.sp + PCRE2_INFO_FIRSTBITMAP +.sp +In the absence of a single first code unit for a non-anchored pattern, +\fBpcre2_compile()\fP may construct a 256-bit table that defines a fixed set of +values for the first code unit in any match. For example, a pattern that starts +with [abc] results in a table with three bits set. When code unit values +greater than 255 are supported, the flag bit for 255 means "any code unit of +value 255 or above". If such a table was constructed, a pointer to it is +returned. Otherwise NULL is returned. The third argument should point to an +\fBconst uint8_t *\fP variable. .sp PCRE2_INFO_FIRSTCODETYPE .sp Return information about the first code unit of any matched string, for a non-anchored pattern. The third argument should point to an \fBuint32_t\fP -variable. -.P -If there is a fixed first value, for example, the letter "c" from a pattern -such as (cat|cow|coyote), 1 is returned, and the character value can be +variable. If there is a fixed first value, for example, the letter "c" from a +pattern such as (cat|cow|coyote), 1 is returned, and the character value can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is known that a match can occur only at the start of the subject or following a newline in the subject, 2 is returned. Otherwise, and for anchored @@ -1562,16 +1759,10 @@ value is always less than 256. In the 16-bit library the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode. .sp - PCRE2_INFO_FIRSTBITMAP + PCRE2_INFO_HASBACKSLASHC .sp -In the absence of a single first code unit for a non-anchored pattern, -\fBpcre2_compile()\fP may construct a 256-bit table that defines a fixed set of -values for the first code unit in any match. For example, a pattern that starts -with [abc] results in a table with three bits set. When code unit values -greater than 255 are supported, the flag bit for 255 means "any code unit of -value 255 or above". If such a table was constructed, a pointer to it is -returned. Otherwise NULL is returned. The third argument should point to an -\fBconst uint8_t *\fP variable. +Return 1 if the pattern contains any instances of \eC, otherwise 0. The third +argument should point to an \fBuint32_t\fP variable. .sp PCRE2_INFO_HASCRORLF .sp @@ -1597,12 +1788,10 @@ Returns 1 if there is a rightmost literal code unit that must exist in any matched string, other than at its start. The third argument should point to an \fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using -PCRE2_INFO_LASTCODEUNIT. -.P -For anchored patterns, a last literal value is recorded only if it follows -something of variable length. For example, for the pattern /^a\ed+z\ed+/ the -returned value is 1 (with "z" returned from PCRE2_INFO_LASTCODEUNIT), but for -/^a\edz\ed/ the returned value is 0. +PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is +recorded only if it follows something of variable length. For example, for the +pattern /^a\ed+z\ed+/ the returned value is 1 (with "z" returned from +PCRE2_INFO_LASTCODEUNIT), but for /^a\edz\ed/ the returned value is 0. .sp PCRE2_INFO_LASTCODEUNIT .sp @@ -1613,8 +1802,11 @@ value, 0 is returned. .sp PCRE2_INFO_MATCHEMPTY .sp -Return 1 if the pattern can match an empty string, otherwise 0. The third -argument should point to an \fBuint32_t\fP variable. +Return 1 if the pattern might match an empty string, otherwise 0. The third +argument should point to an \fBuint32_t\fP variable. When a pattern contains +recursive subroutine calls it is not always possible to determine whether or +not it can match an empty string. PCRE2 takes a cautious approach and returns 1 +in such cases. .sp PCRE2_INFO_MATCHLIMIT .sp @@ -1788,11 +1980,11 @@ documentation. .rs .sp .nf -.B pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, .B " pcre2_general_context *\fIgcontext\fP);" .sp -.B pcre2_match_data_create_from_pattern(const pcre2_code *\fIcode\fP, -.B " pcre2_general_context *\fIgcontext\fP);" +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" .sp .B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); .fi @@ -1801,7 +1993,7 @@ Information about a successful or unsuccessful match is placed in a match data block, which is an opaque structure that is accessed by function calls. In particular, the match data block contains a vector of offsets into the subject string that define the matched part of the subject and any substrings that were -captured. This is know as the \fIovector\fP. +captured. This is known as the \fIovector\fP. .P Before calling \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP you must create a match data block by calling one of @@ -1964,13 +2156,14 @@ pattern does not require the match to be at the start of the subject. .sp The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL, -PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, -PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below. +PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, +PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is +described below. .P Setting PCRE2_ANCHORED at match time is not supported by the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the normal interpretive -code in \fBpcre2_match()\fP is run. The remaining options are supported for JIT -matching. +code in \fBpcre2_match()\fP is run. Apart from PCRE2_NO_JIT (obviously), the +remaining options are supported for JIT matching. .sp PCRE2_ANCHORED .sp @@ -2017,17 +2210,31 @@ only at the first matching position, that is, at the start of the subject plus the starting offset. An empty string match later in the subject is permitted. If the pattern is anchored, such a match can occur only if the pattern contains \eK. +.sp + PCRE2_NO_JIT +.sp +By default, if a pattern has been successfully processed by +\fBpcre2_jit_compile()\fP, JIT is automatically used when \fBpcre2_match()\fP +is called with options that JIT supports. Setting PCRE2_NO_JIT disables the use +of JIT; it forces matching to be done by the interpreter. .sp PCRE2_NO_UTF_CHECK .sp When PCRE2_UTF is set at compile time, the validity of the subject as a UTF string is checked by default when \fBpcre2_match()\fP is subsequently called. -The entire string is checked before any other processing takes place, and a +If a non-zero starting offset is given, the check is applied only to that part +of the subject that could be inspected during matching, and there is a check +that the starting offset points to the first code unit of a character or to the +end of the subject. If there are no lookbehind assertions in the pattern, the +check starts at the starting offset. Otherwise, it starts at the length of the +longest lookbehind before the starting offset, or at the start of the subject +if there are not that many characters before the starting offset. Note that the +sequences \eb and \eB are one-character lookbehinds. +.P +The check is carried out before any other processing takes place, and a negative error code is returned if the check fails. There are several UTF error codes for each code unit width, corresponding to different problems with the -code unit sequence. The value of \fIstartoffset\fP is also checked, to ensure -that it points to the start of a character or to the end of the subject. There -are discussions about the validity of +code unit sequence. There are discussions about the validity of .\" HTML .\" UTF-8 strings, @@ -2092,9 +2299,19 @@ standard convention for the operating system. The default can be overridden in a .\" HTML .\" -compile context. +compile context .\" -During matching, the newline choice affects the behaviour of the dot, +by calling \fBpcre2_set_newline()\fP. It can also be overridden by starting a +pattern string with, for example, (*CRLF), as described in the +.\" HTML +.\" +section on newline conventions +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page. During matching, the newline choice affects the behaviour of the dot, circumflex, and dollar metacharacters. It may also alter the way the match starting position is advanced after a match failure for an unanchored pattern. .P @@ -2140,18 +2357,7 @@ that do not cause substrings to be captured. The \fBpcre2_pattern_info()\fP function can be used to find out how many capturing subpatterns there are in a compiled pattern. .P -A successful match returns the overall matched string and any captured -substrings to the caller via a vector of PCRE2_SIZE values. This is called the -\fBovector\fP, and is contained within the -.\" HTML -.\" -match data block. -.\" -You can obtain direct access to the ovector by calling -\fBpcre2_get_ovector_pointer()\fP to find its address, and -\fBpcre2_get_ovector_count()\fP to find the number of pairs of values it -contains. Alternatively, you can use the auxiliary functions for accessing -captured substrings +You can use auxiliary functions for accessing captured substrings .\" HTML .\" by number @@ -2159,9 +2365,20 @@ by number or .\" HTML .\" -by name +by name, .\" -(see below). +as described in sections below. +.P +Alternatively, you can make direct use of the vector of PCRE2_SIZE values, +called the \fBovector\fP, which contains the offsets of captured strings. It is +part of the +.\" HTML +.\" +match data block. +.\" +The function \fBpcre2_get_ovector_pointer()\fP returns the address of the +ovector, and \fBpcre2_get_ovector_count()\fP returns the number of pairs of +values it contains. .P Within the ovector, the first in each pair of values is set to the offset of the first code unit of a substring, and the second is set to the offset of the @@ -2245,7 +2462,12 @@ After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure to match (PCRE2_ERROR_NOMATCH), a (*MARK) name may be available, and \fBpcre2_get_mark()\fP can be called. It returns a pointer to the zero-terminated name, which is within the compiled pattern. Otherwise NULL is -returned. After a successful match, the (*MARK) name that is returned is the +returned. The length of the (*MARK) name (excluding the terminating zero) is +stored in the code unit that preceeds the name. You should use this instead of +relying on the terminating zero if the (*MARK) name might contain a binary +zero. +.P +After a successful match, the (*MARK) name that is returned is the last one encountered on the matching path through the pattern. After a "no match" or a partial match, the last encountered (*MARK) name is returned. For example, consider this pattern: @@ -2264,7 +2486,7 @@ different to the value of \fIovector[0]\fP if the pattern contains the \eK escape sequence. After a partial match, however, this value is always the same as \fIovector[0]\fP because \eK does not affect the result of a partial match. .P -After a UTF check failure, \fBpcre2_get_startchar()\fB can be used to obtain +After a UTF check failure, \fBpcre2_get_startchar()\fP can be used to obtain the code unit offset of the invalid UTF character. Details are given in the .\" HREF \fBpcre2unicode\fP @@ -2277,11 +2499,16 @@ page. .rs .sp If \fBpcre2_match()\fP fails, it returns a negative number. This can be -converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative -error codes are also returned by other functions, and are documented with them. -The codes are given names in the header file. If UTF checking is in force and -an invalid UTF subject string is detected, one of a number of UTF-specific -negative error codes is returned. Details are given in the +converted to a text string by calling the \fBpcre2_get_error_message()\fP +function (see "Obtaining a textual error message" +.\" HTML +.\" +below). +.\" +Negative error codes are also returned by other functions, and are documented +with them. The codes are given names in the header file. If UTF checking is in +force and an invalid UTF subject string is detected, one of a number of +UTF-specific negative error codes is returned. Details are given in the .\" HREF \fBpcre2unicode\fP .\" @@ -2394,6 +2621,30 @@ is attempted. The internal recursion limit was reached. . . +.\" HTML +.SH "OBTAINING A TEXTUAL ERROR MESSAGE" +.rs +.sp +.nf +.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, +.B " PCRE2_SIZE \fIbufflen\fP);" +.fi +.P +A text message for an error code from any PCRE2 function (compile, match, or +auxiliary) can be obtained by calling \fBpcre2_get_error_message()\fP. The code +is passed as the first argument, with the remaining two arguments specifying a +code unit buffer and its length, into which the text message is placed. Note +that the message is returned in code units of the appropriate width for the +library that is being used. +.P +The returned message is terminated with a trailing zero, and the function +returns the number of code units used, excluding the trailing zero. If the +error number is unknown, the negative error code PCRE2_ERROR_BADDATA is +returned. If the buffer is too small, the message is truncated (but still with +a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned. +None of the messages are very long; a buffer size of 120 code units is ample. +. +. .\" HTML .SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER" .rs @@ -2595,32 +2846,17 @@ same number causes an error at compile time. .B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, .B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," .B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," -.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementzfP," +.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP," .B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\zfP," .B " PCRE2_SIZE *\fIoutlengthptr\fP);" .fi +.P This function calls \fBpcre2_match()\fP and then makes a copy of the subject string in \fIoutputbuffer\fP, replacing the part that was matched with the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can -be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. -.P -In the replacement string, which is interpreted as a UTF string in UTF mode, -and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a -dollar character is an escape character that can specify the insertion of -characters from capturing groups in the pattern. The following forms are -recognized: -.sp - $$ insert a dollar character - $ insert the contents of group - ${} insert the contents of group -.sp -Either a group number or a group name can be given for . Curly brackets are -required only if the following character would be interpreted as part of the -number or name. The number may be zero to include the entire matched string. -For example, if the pattern a(b)c is matched with "=abc=" and the replacement -string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by -calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as -appropriate. +be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in +which a \eK item in a lookahead in the pattern causes the match to end before +it starts are not supported, and give rise to an error return. .P The first seven arguments of \fBpcre2_substitute()\fP are the same as for \fBpcre2_match()\fP, except that the partial matching options are not @@ -2629,23 +2865,169 @@ data block is obtained and freed within this function, using memory management functions from the match context, if provided, or else those that were used to allocate memory for the compiled code. .P -There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the -function to iterate over the subject string, replacing every matching -substring. If this is not set, only the first matching substring is replaced. -.P The \fIoutlengthptr\fP argument must point to a variable that contains the -length, in code units, of the output buffer. It is updated to contain the -length of the new string, excluding the trailing zero that is automatically -added. +length, in code units, of the output buffer. If the function is successful, the +value is updated to contain the length of the new string, excluding the +trailing zero that is automatically added. .P -The function returns the number of replacements that were made. This may be -zero if no matches were found, and is never greater than 1 unless -PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code -is returned. Except for PCRE2_ERROR_NOMATCH (which is never returned), any -errors from \fBpcre2_match()\fP or the substring copying functions are passed -straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid -replacement string (unrecognized sequence following a dollar sign), and -PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. +If the function is not successful, the value set via \fIoutlengthptr\fP depends +on the type of error. For syntax errors in the replacement string, the value is +the offset in the replacement string where the error was detected. For other +errors, the value is PCRE2_UNSET by default. This includes the case of the +output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set +(see below), in which case the value is the minimum length needed, including +space for the trailing zero. Note that in order to compute the required length, +\fBpcre2_substitute()\fP has to simulate all the matching and copying, instead +of giving an error return as soon as the buffer overflows. Note also that the +length is in code units, not bytes. +.P +In the replacement string, which is interpreted as a UTF string in UTF mode, +and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a +dollar character is an escape character that can specify the insertion of +characters from capturing groups or (*MARK) items in the pattern. The following +forms are always recognized: +.sp + $$ insert a dollar character + $ or ${} insert the contents of group + $*MARK or ${*MARK} insert the name of the last (*MARK) encountered +.sp +Either a group number or a group name can be given for . Curly brackets are +required only if the following character would be interpreted as part of the +number or name. The number may be zero to include the entire matched string. +For example, if the pattern a(b)c is matched with "=abc=" and the replacement +string "+$1$0$1+", the result is "=+babcb+=". +.P +The facility for inserting a (*MARK) name can be used to perform simple +simultaneous substitutions, as this \fBpcre2test\fP example shows: +.sp + /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK} + apple lemon + 2: pear orange +.sp +As well as the usual options for \fBpcre2_match()\fP, a number of additional +options can be set in the \fIoptions\fP argument. +.P +PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string, +replacing every matching substring. If this is not set, only the first matching +substring is replaced. If any matched substring has zero length, after the +substitution has happened, an attempt to find a non-empty match at the same +position is performed. If this is not successful, the current position is +advanced by one character except when CRLF is a valid newline sequence and the +next two characters are CR, LF. In this case, the current position is advanced +by two characters. +.P +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is +too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If +this option is set, however, \fBpcre2_substitute()\fP continues to go through +the motions of matching and substituting (without, of course, writing anything) +in order to compute the size of buffer that is needed. This value is passed +back via the \fIoutlengthptr\fP variable, with the result of the function still +being PCRE2_ERROR_NOMEMORY. +.P +Passing a buffer size of zero is a permitted way of finding out how much memory +is needed for given substitution. However, this does mean that the entire +operation is carried out twice. Depending on the application, it may be more +efficient to allocate a large buffer and free the excess afterwards, instead of +using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. +.P +PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups that do +not appear in the pattern to be treated as unset groups. This option should be +used with care, because it means that a typo in a group name or number no +longer causes the PCRE2_ERROR_NOSUBSTRING error. +.P +PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including unknown +groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty +strings when inserted as described above. If this option is not set, an attempt +to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does +not influence the extended substitution syntax described below. +.P +PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the +replacement string. Without this option, only the dollar character is special, +and only the group insertion forms listed above are valid. When +PCRE2_SUBSTITUTE_EXTENDED is set, two things change: +.P +Firstly, backslash in a replacement string is interpreted as an escape +character. The usual forms such as \en or \ex{ddd} can be used to specify +particular character codes, and backslash followed by any non-alphanumeric +character quotes that character. Extended quoting can be coded using \eQ...\eE, +exactly as in pattern strings. +.P +There are also four escape sequences for forcing the case of inserted letters. +The insertion mechanism has three states: no case forcing, force upper case, +and force lower case. The escape sequences change the current state: \eU and +\eL change to upper or lower case forcing, respectively, and \eE (when not +terminating a \eQ quoted sequence) reverts to no case forcing. The sequences +\eu and \el force the next character (if it is a letter) to upper or lower +case, respectively, and then the state automatically reverts to no case +forcing. Case forcing applies to all inserted characters, including those from +captured groups and letters within \eQ...\eE quoted sequences. +.P +Note that case forcing sequences such as \eU...\eE do not nest. For example, +the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no +effect. +.P +The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +flexibility to group substitution. The syntax is similar to that used by Bash: +.sp + ${:-} + ${:+:} +.sp +As before, may be a group number or a name. The first form specifies a +default value. If group is set, its value is inserted; if not, is +expanded and the result inserted. The second form specifies strings that are +expanded and inserted when group is set or unset, respectively. The first +form is just a convenient shorthand for +.sp + ${:+${}:} +.sp +Backslash can be used to escape colons and closing curly brackets in the +replacement strings. A change of the case forcing state within a replacement +string remains in force afterwards, as shown in this \fBpcre2test\fP example: +.sp + /(some)?(body)/substitute_extended,replace=${1:+\eU:\eL}HeLLo + body + 1: hello + somebody + 1: HELLO +.sp +The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended +substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown +groups in the extended syntax forms to be treated as unset. +.P +If successful, \fBpcre2_substitute()\fP returns the number of replacements that +were made. This may be zero if no matches were found, and is never greater than +1 unless PCRE2_SUBSTITUTE_GLOBAL is set. +.P +In the event of an error, a negative error code is returned. Except for +PCRE2_ERROR_NOMATCH (which is never returned), errors from \fBpcre2_match()\fP +are passed straight back. +.P +PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion, +unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. +.P +PCRE2_ERROR_UNSET is returned for an unset substring insertion (including an +unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) when the simple +(non-extended) syntax is used and PCRE2_SUBSTITUTE_UNSET_EMPTY is not set. +.P +PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is +needed is returned via \fIoutlengthptr\fP. Note that this does not happen by +default. +.P +PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the +replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE +(invalid escape sequence), PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket +not found), PCRE2_BADSUBSTITUTION (syntax error in extended group +substitution), and PCRE2_BADSUBPATTERN (the pattern match ended before it +started, which can happen if \eK is used in an assertion). +.P +As for all PCRE2 errors, a text message that describes the error can be +obtained by calling the \fBpcre2_get_error_message()\fP function (see +"Obtaining a textual error message" +.\" HTML +.\" +above). +.\" . . .SH "DUPLICATE SUBPATTERN NAMES" @@ -2686,14 +3068,14 @@ first and last entries in the name-to-number table for the given name, and the function returns the length of each entry in code units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name. .P -The format of the name table is described above in the section entitled -\fIInformation about a pattern\fP +The format of the name table is described .\" HTML .\" -above. +above .\" -Given all the relevant entries for the name, you can extract each of their -numbers, and hence the captured data. +in the section entitled \fIInformation about a pattern\fP. Given all the +relevant entries for the name, you can extract each of their numbers, and hence +the captured data. . . .SH "FINDING ALL POSSIBLE MATCHES AT ONE POSITION" @@ -2888,8 +3270,8 @@ There are in addition the following errors that are specific to PCRE2_ERROR_DFA_UITEM .sp This return is given if \fBpcre2_dfa_match()\fP encounters an item in the -pattern that it does not support, for instance, the use of \eC or a back -reference. +pattern that it does not support, for instance, the use of \eC in a UTF mode or +a back reference. .sp PCRE2_ERROR_DFA_UCOND .sp @@ -2939,6 +3321,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 22 April 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 23 December 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2build.3 b/pcre2/doc/pcre2build.3 index 8f74e9b6b..ea9d8a97b 100644 --- a/pcre2/doc/pcre2build.3 +++ b/pcre2/doc/pcre2build.3 @@ -1,4 +1,4 @@ -.TH PCRE2BUILD 3 "23 April 2015" "PCRE2 10.20" +.TH PCRE2BUILD 3 "01 November 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) . @@ -132,11 +132,20 @@ Pattern escapes such as \ed and \ew do not by default make use of Unicode properties. The application can request that they do by setting the PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also request this by starting with (*UCP). -.P +. +. +.SH "DISABLING THE USE OF \eC" +.rs +.sp The \eC escape sequence, which matches a single code unit, even in a UTF mode, can cause unpredictable behaviour because it may leave the current matching -point in the middle of a multi-code-unit character. It can be locked out by -setting the PCRE2_NEVER_BACKSLASH_C option. +point in the middle of a multi-code-unit character. The application can lock it +out by setting the PCRE2_NEVER_BACKSLASH_C option when calling +\fBpcre2_compile()\fP. There is also a build-time option +.sp + --enable-never-backslash-C +.sp +(note the upper case C) which locks out the use of \eC entirely. . . .SH "JUST-IN-TIME COMPILER SUPPORT" @@ -343,6 +352,19 @@ and equivalent run-time options, refer to these character values in an EBCDIC environment. . . +.SH "PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS" +.rs +.sp +By default, on non-Windows systems, \fBpcre2grep\fP supports the use of +callouts with string arguments within the patterns it is matching, in order to +run external scripts. For details, see the +.\" HREF +\fBpcre2grep\fP +.\" +documentation. This support can be disabled by adding +--disable-pcre2grep-callout to the \fBconfigure\fP command. +. +. .SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT" .rs .sp @@ -363,16 +385,19 @@ they are not. .sp \fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is scanning, in order to be able to output "before" and "after" lines when it -finds a match. The size of the buffer is controlled by a parameter whose -default value is 20K. The buffer itself is three times this size, but because -of the way it is used for holding "before" lines, the longest line that is -guaranteed to be processable is the parameter size. You can change the default -parameter value by adding, for example, +finds a match. The starting size of the buffer is controlled by a parameter +whose default value is 20K. The buffer itself is three times this size, but +because of the way it is used for holding "before" lines, the longest line that +is guaranteed to be processable is the parameter size. If a longer line is +encountered, \fBpcre2grep\fP automatically expands the buffer, up to a +specified maximum size, whose default is 1M or the starting size, whichever is +the larger. You can change the default parameter values by adding, for example, .sp - --with-pcre2grep-bufsize=50K + --with-pcre2grep-bufsize=51200 + --with-pcre2grep-max-bufsize=2097152 .sp -to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override this -value by using --buffer-size on the command line.. +to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override +these values by using --buffer-size and --max-buffer-size on the command line. . . .SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT" @@ -490,6 +515,28 @@ information about code coverage, see the \fBgcov\fP and \fBlcov\fP documentation. . . +.SH "SUPPORT FOR FUZZERS" +.rs +.sp +There is a special option for use by people who want to run fuzzing tests on +PCRE2: +.sp + --enable-fuzz-support +.sp +At present this applies only to the 8-bit library. If set, it causes an extra +library called libpcre2-fuzzsupport.a to be built, but not installed. This +contains a single function called LLVMFuzzerTestOneInput() whose arguments are +a pointer to a string and the length of the string. When called, this function +tries to compile the string as a pattern, and if that succeeds, to match it. +This is done both with no options and with some random options bits that are +generated from the string. Setting --enable-fuzz-support also causes a binary +called \fBpcre2fuzzcheck\fP to be created. This is normally run under valgrind +or used when PCRE2 is compiled with address sanitizing enabled. It calls the +fuzzing function and outputs information about it is doing. The input strings +are specified by arguments: if an argument starts with "=" the rest of it is a +literal input string. Otherwise, it is assumed to be a file name, and the +contents of the file are the test string. +. .SH "SEE ALSO" .rs .sp @@ -510,6 +557,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 24 April 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 01 November 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2callout.3 b/pcre2/doc/pcre2callout.3 index 6919f5a61..001796d68 100644 --- a/pcre2/doc/pcre2callout.3 +++ b/pcre2/doc/pcre2callout.3 @@ -1,4 +1,4 @@ -.TH PCRE2CALLOUT 3 "23 March 2015" "PCRE2 10.20" +.TH PCRE2CALLOUT 3 "29 September 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -40,11 +40,20 @@ two callout points: .sp If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2 automatically inserts callouts, all with number 255, before each item in the -pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern +pattern except for immediately before or after a callout item in the pattern. +For example, if PCRE2_AUTO_CALLOUT is used with the pattern +.sp + A(?C3)B +.sp +it is processed as if it were +.sp + (?C255)A(?C3)B(?C255) +.sp +Here is a more complicated example: .sp A(\ed{2}|--) .sp -it is processed as if it were +With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were .sp (?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) .sp @@ -91,10 +100,10 @@ with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string No match .sp This indicates that when matching [bc] fails, there is no backtracking into a+ -and therefore the callouts that would be taken for the backtracks do not occur. -You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to -\fBpcre2_compile()\fP, or starting the pattern with (*NO_AUTO_POSSESS). In this -case, the output changes to this: +(because it is being treated as a++) and therefore the callouts that would be +taken for the backtracks do not occur. You can disable the auto-possessify +feature by passing PCRE2_NO_AUTO_POSSESS to \fBpcre2_compile()\fP, or starting +the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this: .sp --->aaaa +0 ^ a+ @@ -220,8 +229,8 @@ but the intention is never to remove any of the existing fields. .sp For a numerical callout, \fIcallout_string\fP is NULL, and \fIcallout_number\fP contains the number of the callout, in the range 0-255. This is the number -that follows (?C for manual callouts; it is 255 for automatically generated -callouts. +that follows (?C for callouts that part of the pattern; it is 255 for +automatically generated callouts. . . .SS "Fields for string callouts" @@ -286,10 +295,15 @@ The \fIpattern_position\fP field contains the offset in the pattern string to the next item to be matched. .P The \fInext_item_length\fP field contains the length of the next item to be -matched in the pattern string. When the callout immediately precedes an -alternation bar, a closing parenthesis, or the end of the pattern, the length -is zero. When the callout precedes an opening parenthesis, the length is that -of the entire subpattern. +processed in the pattern string. When the callout is at the end of the pattern, +the length is zero. When the callout precedes an opening parenthesis, the +length includes meta characters that follow the parenthesis. For example, in a +callout before an assertion such as (?=ab) the length is 3. For an an +alternation bar or a closing parenthesis, the length is one, unless a closing +parenthesis is followed by a quantifier, in which case its length is included. +(This changed in release 10.23. In earlier releases, before an opening +parenthesis the length was that of the entire subpattern, and before an +alternation bar or a closing parenthesis the length was zero.) .P The \fIpattern_position\fP and \fInext_item_length\fP fields are intended to help in distinguishing between different automatic callouts, which all have the @@ -382,6 +396,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 23 March 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 29 September 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2compat.3 b/pcre2/doc/pcre2compat.3 index a3306d782..64d0df3d3 100644 --- a/pcre2/doc/pcre2compat.3 +++ b/pcre2/doc/pcre2compat.3 @@ -1,4 +1,4 @@ -.TH PCRE2COMPAT 3 "15 March 2015" "PCRE2 10.20" +.TH PCRE2COMPAT 3 "18 October 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "DIFFERENCES BETWEEN PCRE2 AND PERL" @@ -96,7 +96,7 @@ processed as anchored at the point where they are tested. one that is backtracked onto acts. For example, in the pattern A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the -same as PCRE2, but there are examples where it differs. +same as PCRE2, but there are cases where it differs. .P 11. Most backtracking verbs in assertions have their normal actions. They are not confined to the assertion. @@ -109,17 +109,18 @@ the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to 13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern names is not as general as Perl's. This is a consequence of the fact the PCRE2 works internally just with numbers, using an external table to translate -between numbers and names. In particular, a pattern such as (?|(?A)|(?A)|(?B), where the two capturing parentheses have the same number but different names, is not supported, and causes an error at compile time. If it were allowed, it would not be possible to distinguish which parentheses matched, because both names map to capturing subpattern number 1. To avoid this confusing situation, an error is given at compile time. .P -14. Perl recognizes comments in some places that PCRE2 does not, for example, -between the ( and ? at the start of a subpattern. If the /x modifier is set, -Perl allows white space between ( and ? (though current Perls warn that this is -deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set. +14. Perl used to recognize comments in some places that PCRE2 does not, for +example, between the ( and ? at the start of a subpattern. If the /x modifier +is set, Perl allowed white space between ( and ? though the latest Perls give +an error (for a while it was just deprecated). There may still be some cases +where Perl behaves differently. .P 15. Perl, when in warning mode, gives warnings for character classes such as [A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no @@ -141,33 +142,37 @@ list is with respect to Perl 5.10: each alternative branch of a lookbehind assertion can match a different length of string. Perl requires them all to have the same length. .sp -(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ +(b) From PCRE2 10.23, back references to groups of fixed length are supported +in lookbehinds, provided that there is no possibility of referencing a +non-unique number or name. Perl does not support backreferences in lookbehinds. +.sp +(c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ meta-character matches only at the very end of the string. .sp -(c) A backslash followed by a letter with no special meaning is faulted. (Perl +(d) A backslash followed by a letter with no special meaning is faulted. (Perl can be made to issue a warning.) .sp -(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is +(e) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is inverted, that is, by default they are not greedy, but if followed by a question mark they are. .sp -(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried +(f) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried only at the first matching position in the subject string. .sp -(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and +(g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents. .sp -(g) The \eR escape sequence can be restricted to match only CR, LF, or CRLF +(h) The \eR escape sequence can be restricted to match only CR, LF, or CRLF by the PCRE2_BSR_ANYCRLF option. .sp -(h) The callout facility is PCRE2-specific. +(i) The callout facility is PCRE2-specific. .sp -(i) The partial matching facility is PCRE2-specific. +(j) The partial matching facility is PCRE2-specific. .sp -(j) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a +(k) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a different way and is not Perl-compatible. .sp -(k) PCRE2 recognizes some special sequences such as (*CR) at the start of +(l) PCRE2 recognizes some special sequences such as (*CR) at the start of a pattern that set overall options that cannot be changed within the pattern. . . @@ -185,6 +190,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 15 March 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 18 October 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2demo.3 b/pcre2/doc/pcre2demo.3 index 5deed0a05..c02dcd95c 100644 --- a/pcre2/doc/pcre2demo.3 +++ b/pcre2/doc/pcre2demo.3 @@ -20,28 +20,31 @@ *************************************************/ /* This is a demonstration program to illustrate a straightforward way of -calling the PCRE2 regular expression library from a C program. See the +using the PCRE2 regular expression library from a C program. See the pcre2sample documentation for a short discussion ("man pcre2sample" if you have the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is incompatible with the original PCRE API. There are actually three libraries, each supporting a different code unit -width. This demonstration program uses the 8-bit library. +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. In Unix-like environments, if PCRE2 is installed in your standard system libraries, you should be able to compile this program using this command: -gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo If PCRE2 is not installed in a standard place, it is likely to be installed with support for the pkg-config mechanism. If you have pkg-config, you can compile this program using this command: -gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo -If you do not have pkg-config, you may have to use this: +If you do not have pkg-config, you may have to use something like this: -gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e -R/usr/local/lib -lpcre2-8 -o pcre2demo Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and @@ -56,9 +59,14 @@ the following line. */ /* #define PCRE2_STATIC */ -/* This macro must be defined before including pcre2.h. For a program that uses -only one code unit width, it makes it possible to use generic function names -such as pcre2_compile(). */ +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ #define PCRE2_CODE_UNIT_WIDTH 8 @@ -79,19 +87,19 @@ int main(int argc, char **argv) { pcre2_code *re; PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ -PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ PCRE2_SPTR name_table; int crlf_is_newline; int errornumber; int find_all; int i; -int namecount; -int name_entry_size; int rc; int utf8; uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; uint32_t newline; PCRE2_SIZE erroroffset; @@ -106,15 +114,19 @@ pcre2_match_data *match_data; * First, sort out the command line. There is only one possible option at * * the moment, "-g" to request repeated matching to find all occurrences, * * like Perl's /g option. We set the variable find_all to a non-zero value * -* if the -g option is present. Apart from that, there must be exactly two * -* arguments. * +* if the -g option is present. * **************************************************************************/ find_all = 0; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-g") == 0) find_all = 1; - else break; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\en", argv[i]); + return 1; + } + else break; } /* After the options, we require exactly two arguments, which are the pattern, @@ -122,7 +134,7 @@ and the subject string. */ if (argc - i != 2) { - printf("Two arguments required: a regex and a subject string\en"); + printf("Exactly two arguments required: a regex and a subject string\en"); return 1; } @@ -201,7 +213,7 @@ if (rc < 0) stored. */ ovector = pcre2_get_ovector_pointer(match_data); -printf("\enMatch succeeded at offset %d\en", (int)ovector[0]); +printf("Match succeeded at offset %d\en", (int)ovector[0]); /************************************************************************* @@ -242,7 +254,7 @@ we have to extract the count of named parentheses from the pattern. */ PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ &namecount); /* where to put the answer */ -if (namecount <= 0) printf("No named substrings\en"); else +if (namecount == 0) printf("No named substrings\en"); else { PCRE2_SPTR tabptr; printf("Named substrings\en"); @@ -330,8 +342,8 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY || for (;;) { - uint32_t options = 0; /* Normally no options */ - PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ /* If the previous match was for an empty string, we are finished if we are at the end of the subject. Otherwise, arrange to run another match at the @@ -371,7 +383,7 @@ for (;;) { if (options == 0) break; /* All matches found */ ovector[1] = start_offset + 1; /* Advance one code unit */ - if (crlf_is_newline && /* If CRLF is newline & */ + if (crlf_is_newline && /* If CRLF is a newline & */ start_offset < subject_length - 1 && /* we are at CRLF, */ subject[start_offset] == '\er' && subject[start_offset + 1] == '\en') @@ -417,7 +429,7 @@ for (;;) printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); } - if (namecount <= 0) printf("No named substrings\en"); else + if (namecount == 0) printf("No named substrings\en"); else { PCRE2_SPTR tabptr = name_table; printf("Named substrings\en"); diff --git a/pcre2/doc/pcre2grep.1 b/pcre2/doc/pcre2grep.1 index 028a91e4e..80e8899e7 100644 --- a/pcre2/doc/pcre2grep.1 +++ b/pcre2/doc/pcre2grep.1 @@ -1,4 +1,4 @@ -.TH PCRE2GREP 1 "03 January 2015" "PCRE2 10.00" +.TH PCRE2GREP 1 "31 December 2016" "PCRE2 10.23" .SH NAME pcre2grep - a grep with Perl-compatible regular expressions. .SH SYNOPSIS @@ -52,11 +52,18 @@ span line boundaries. What defines a line boundary is controlled by the \fB-N\fP (\fB--newline\fP) option. .P The amount of memory used for buffering files that are being scanned is -controlled by a parameter that can be set by the \fB--buffer-size\fP option. -The default value for this parameter is specified when \fBpcre2grep\fP is -built, with the default default being 20K. A block of memory three times this -size is used (to allow for buffering "before" and "after" lines). An error -occurs if a line overflows the buffer. +controlled by parameters that can be set by the \fB--buffer-size\fP and +\fB--max-buffer-size\fP options. The first of these sets the size of buffer +that is obtained at the start of processing. If an input file contains very +long lines, a larger buffer may be needed; this is handled by automatically +extending the buffer, up to the limit specified by \fB--max-buffer-size\fP. The +default values for these parameters are specified when \fBpcre2grep\fP is +built, with the default defaults being 20K and 1M respectively. An error occurs +if a line is too long and the buffer can no longer be expanded. +.P +The block of memory that is actually used is three times the "buffer size", to +allow for buffering "before" and "after" lines. If the buffer size is too +small, fewer than requested "before" and "after" lines may be output. .P Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater. BUFSIZ is defined in \fB\fP. When there is more than one pattern @@ -126,24 +133,27 @@ command line starts with a hyphen but is not an option. This allows for the processing of patterns and file names that start with hyphens. .TP \fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP -Output \fInumber\fP lines of context after each matching line. If file names -and/or line numbers are being output, a hyphen separator is used instead of a -colon for the context lines. A line containing "--" is output between each -group of lines, unless they are in fact contiguous in the input file. The value -of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP -guarantees to have up to 8K of following text available for context output. +Output up to \fInumber\fP lines of context after each matching line. Fewer +lines are output if the next match or the end of the file is reached, or if the +processing buffer size has been set too small. If file names and/or line +numbers are being output, a hyphen separator is used instead of a colon for the +context lines. A line containing "--" is output between each group of lines, +unless they are in fact contiguous in the input file. The value of \fInumber\fP +is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored. .TP \fB-a\fP, \fB--text\fP Treat binary files as text. This is equivalent to \fB--binary-files\fP=\fItext\fP. .TP \fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP -Output \fInumber\fP lines of context before each matching line. If file names -and/or line numbers are being output, a hyphen separator is used instead of a -colon for the context lines. A line containing "--" is output between each -group of lines, unless they are in fact contiguous in the input file. The value -of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP -guarantees to have up to 8K of preceding text available for context output. +Output up to \fInumber\fP lines of context before each matching line. Fewer +lines are output if the previous match or the start of the file is within +\fInumber\fP lines, or if the processing buffer size has been set too small. If +file names and/or line numbers are being output, a hyphen separator is used +instead of a colon for the context lines. A line containing "--" is output +between each group of lines, unless they are in fact contiguous in the input +file. The value of \fInumber\fP is expected to be relatively small. When +\fB-c\fP is used, \fB-B\fP is ignored. .TP \fB--binary-files=\fP\fIword\fP Specify how binary files are to be processed. If the word is "binary" (the @@ -158,8 +168,9 @@ be of interest and are skipped without causing any output or affecting the return code. .TP \fB--buffer-size=\fP\fInumber\fP -Set the parameter that controls how much memory is used for buffering files -that are being scanned. +Set the parameter that controls how much memory is obtained at the start of +processing for buffering files that are being scanned. See also +\fB--max-buffer-size\fP below. .TP \fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP Output \fInumber\fP lines of context both before and after each matching line. @@ -167,13 +178,15 @@ This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value. .TP \fB-c\fP, \fB--count\fP Do not output lines from the files that are being scanned; instead output the -number of matches (or non-matches if \fB-v\fP is used) that would otherwise -have caused lines to be shown. By default, this count is the same as the number -of suppressed lines, but if the \fB-M\fP (multiline) option is used (without -\fB-v\fP), there may be more suppressed lines than the number of matches. +number of lines that would have been shown, either because they matched, or, if +\fB-v\fP is set, because they failed to match. By default, this count is +exactly the same as the number of lines that would have been output, but if the +\fB-M\fP (multiline) option is used (without \fB-v\fP), there may be more +suppressed lines than the count (that is, the number of matches). .sp If no lines are selected, the number zero is output. If several files are are -being scanned, a count is output for each of them. However, if the +being scanned, a count is output for each of them and the \fB-t\fP option can +be used to cause a total to be output at the end. However, if the \fB--files-with-matches\fP option is also used, only those files whose counts are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. @@ -192,12 +205,22 @@ connected to a terminal. More resources are used when colouring is enabled, because \fBpcre2grep\fP has to search for all possible matches in a line, not just one, in order to colour them all. .sp -The colour that is used can be specified by setting the environment variable -PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a -string of two numbers, separated by a semicolon. They are copied directly into -the control string for setting colour on a terminal, so it is your -responsibility to ensure that they make sense. If neither of the environment -variables is set, the default is "1;31", which gives red. +The colour that is used can be specified by setting one of the environment +variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or +PCREGREP_COLOR, which are checked in that order. If none of these are set, +\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value +of the variable should be a string of two numbers, separated by a semicolon, +except in the case of GREP_COLORS, which must start with "ms=" or "mt=" +followed by two semicolon-separated colours, terminated by the end of the +string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is +ignored, and GREP_COLOR is checked. +.sp +If the string obtained from one of the above variables contains any characters +other than semicolon or digits, the setting is ignored and the default colour +is used. The string is copied directly into the control string for setting +colour on a terminal, so it is your responsibility to ensure that the values +make sense. If no relevant environment variable is set, the default is "1;31", +which gives red. .TP \fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP If an input path is not a regular file or a directory, "action" specifies how @@ -273,17 +296,17 @@ files; it does not apply to patterns specified by any of the \fB--include\fP or \fB--exclude\fP options. .TP \fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP -Read patterns from the file, one per line, and match them against -each line of input. What constitutes a newline when reading the file is the -operating system's default. The \fB--newline\fP option has no effect on this -option. Trailing white space is removed from each line, and blank lines are -ignored. An empty file contains no patterns and therefore matches nothing. See -also the comments about multiple patterns versus a single pattern with -alternatives in the description of \fB-e\fP above. +Read patterns from the file, one per line, and match them against each line of +input. What constitutes a newline when reading the file is the operating +system's default. The \fB--newline\fP option has no effect on this option. +Trailing white space is removed from each line, and blank lines are ignored. An +empty file contains no patterns and therefore matches nothing. See also the +comments about multiple patterns versus a single pattern with alternatives in +the description of \fB-e\fP above. .sp -If this option is given more than once, all the specified files are -read. A data line is output if any of the patterns match it. A file name can -be given as "-" to refer to the standard input. When \fB-f\fP is used, patterns +If this option is given more than once, all the specified files are read. A +data line is output if any of the patterns match it. A file name can be given +as "-" to refer to the standard input. When \fB-f\fP is used, patterns specified on the command line using \fB-e\fP may also be present; they are tested before the file's patterns. However, no other pattern is taken from the command line; all arguments are treated as the names of paths to be searched. @@ -432,18 +455,25 @@ of use only if it is set smaller than \fB--match-limit\fP. There are no short forms for these options. The default settings are specified when the PCRE2 library is compiled, with the default default being 10 million. .TP +\fB--max-buffer-size=\fInumber\fP +This limits the expansion of the processing buffer, whose initial size can be +set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no +smaller than the starting buffer size. +.TP \fB-M\fP, \fB--multiline\fP -Allow patterns to match more than one line. When this option is given, patterns -may usefully contain literal newline characters and internal occurrences of ^ -and $ characters. The output for a successful match may consist of more than -one line. The first is the line in which the match started, and the last is the -line in which the match ended. If the matched string ends with a newline -sequence the output ends at the end of that line. +Allow patterns to match more than one line. When this option is set, the PCRE2 +library is called in "multiline" mode. This allows a matched string to extend +past the end of a line and continue on one or more subsequent lines. Patterns +used with \fB-M\fP may usefully contain literal newline characters and internal +occurrences of ^ and $ characters. The output for a successful match may +consist of more than one line. The first line is the line in which the match +started, and the last line is the line in which the match ended. If the matched +string ends with a newline sequence, the output ends at the end of that line. +If \fB-v\fP is set, none of the lines in a multi-line match are output. Once a +match has been handled, scanning restarts at the beginning of the line after +the one in which the match ended. .sp -When this option is set, the PCRE2 library is called in "multiline" mode. -However, \fBpcre2grep\fP still processes the input line by line. The difference -is that a matched string may extend past the end of a line and continue on -one or more subsequent lines. The newline sequence must be matched as part of +The newline sequence that separates multiple lines must be matched as part of the pattern. For example, to find the phrase "regular expression" in a file where "regular" might be at the end of a line and "expression" at the start of the next line, you could use this command: @@ -455,11 +485,8 @@ and is followed by + so as to match trailing white space on the first line as well as possibly handling a two-character newline sequence. .sp There is a limit to the number of lines that can be matched, imposed by the way -that \fBpcre2grep\fP buffers the input file as it scans it. However, -\fBpcre2grep\fP ensures that at least 8K characters or the rest of the file -(whichever is the shorter) are available for forward matching, and similarly -the previous 8K characters (or all the previous characters, if fewer than 8K) -are guaranteed to be available for lookbehind assertions. The \fB-M\fP option +that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently +large processing buffer, this should not be a problem, but the \fB-M\fP option does not work when input is read line by line (see \fP--line-buffered\fP.) .TP \fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP @@ -502,12 +529,13 @@ It should never be needed in normal use. Show only the part of the line that matched a pattern instead of the whole line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is more than one match in a line, each -of them is shown separately. If \fB-o\fP is combined with \fB-v\fP (invert the -sense of the match to find non-matching lines), no output is generated, but the -return code is set appropriately. If the matched portion of the line is empty, -nothing is output unless the file name or line number are being printed, in -which case they are shown on an otherwise empty line. This option is mutually -exclusive with \fB--file-offsets\fP and \fB--line-offsets\fP. +of them is shown separately, on a separate line of output. If \fB-o\fP is +combined with \fB-v\fP (invert the sense of the match to find non-matching +lines), no output is generated, but the return code is set appropriately. If +the matched portion of the line is empty, nothing is output unless the file +name or line number are being printed, in which case they are shown on an +otherwise empty line. This option is mutually exclusive with +\fB--file-offsets\fP and \fB--line-offsets\fP. .TP \fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP Show only the part of the line that matched the capturing parentheses of the @@ -519,10 +547,11 @@ for the non-argument case above also apply to this case. If the specified capturing parentheses do not exist in the pattern, or were not set in the match, nothing is output unless the file name or line number are being output. .sp -If this option is given multiple times, multiple substrings are output, in the -order the options are given. For example, -o3 -o1 -o3 causes the substrings -matched by capturing parentheses 3 and 1 and then 3 again to be output. By -default, there is no separator (but see the next option). +If this option is given multiple times, multiple substrings are output for each +match, in the order the options are given, and all on one line. For example, +-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and +then 3 again to be output. By default, there is no separator (but see the next +option). .TP \fB--om-separator\fP=\fItext\fP Specify a separating string for multiple occurrences of \fB-o\fP. The default @@ -547,6 +576,17 @@ Suppress error messages about non-existent or unreadable files. Such files are quietly skipped. However, the return code is still 2, even if matches were found in other files. .TP +\fB-t\fP, \fB--total-count\fP +This option is useful when scanning more than one file. If used on its own, +\fB-t\fP suppresses all output except for a grand total number of matching +lines (or non-matching lines if \fB-v\fP is used) in all the files. If \fB-t\fP +is used with \fB-c\fP, a grand total is output except when the previous output +is just one line. In other words, it is not output when just one file's count +is listed. If file names are being output, the grand total is preceded by +"TOTAL:". Otherwise, it appears as just another number. The \fB-t\fP option is +ignored when used with \fB-L\fP (list files without matches), because the grand +total would always be zero. +.TP \fB-u\fP, \fB--utf-8\fP Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled with UTF-8 support. All patterns (including those for any \fB--exclude\fP and @@ -570,11 +610,12 @@ specified by any of the \fB--include\fP or \fB--exclude\fP options. .TP \fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP Force the patterns to be anchored (each must start matching at the beginning of -a line) and in addition, require them to match entire lines. This is equivalent -to having ^ and $ characters at the start and end of each alternative top-level -branch in every pattern. This option applies only to the patterns that are -matched against the contents of files; it does not apply to patterns specified -by any of the \fB--include\fP or \fB--exclude\fP options. +a line) and in addition, require them to match entire lines. In multiline mode +the match may be more than one line. This is equivalent to having \eA and \eZ +characters at the start and end of each alternative top-level branch in every +pattern. This option applies only to the patterns that are matched against the +contents of files; it does not apply to patterns specified by any of the +\fB--include\fP or \fB--exclude\fP options. . . .SH "ENVIRONMENT VARIABLES" @@ -653,6 +694,58 @@ options does have data, it must be given in the first form, using an equals character. Otherwise \fBpcre2grep\fP will assume that it has no data. . . +.SH "CALLING EXTERNAL SCRIPTS" +.rs +.sp +\fBpcre2grep\fP has, by default, support for calling external programs or +scripts during matching by making use of PCRE2's callout facility. However, +this support can be disabled when \fBpcre2grep\fP is built. You can find out +whether your binary has support for callouts by running it with the \fB--help\fP +option. If the support is not enabled, all callouts in patterns are ignored by +\fBpcre2grep\fP. +.P +A callout in a PCRE2 pattern is of the form (?C) where the argument is +either a number or a quoted string (see the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP. +String arguments are parsed as a list of substrings separated by pipe (vertical +bar) characters. The first substring must be an executable name, with the +following substrings specifying arguments: +.sp + executable_name|arg1|arg2|... +.sp +Any substring (including the executable name) may contain escape sequences +started by a dollar character: $ or ${} is replaced by the +captured substring of the given decimal number, which must be greater than +zero. If the number is greater than the number of capturing substrings, or if +the capture is unset, the replacement is empty. +.P +Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar and $| is replaced by a pipe character. Here is an example: +.sp + echo -e "abcde\en12345" | pcre2grep \e + '(?x)(.)(..(.)) + (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - +.sp + Output: +.sp + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 +.sp +The parameters for the \fBexecv()\fP system call that is used to run the +program or script are zero-terminated strings. This means that binary zero +characters in the callout argument will cause premature termination of their +substrings, and therefore should not be present. Any syntax errors in the +string (for example, a dollar not followed by another character) cause the +callout to be ignored. If running the program fails for any reason (including +the non-existence of the executable), a local matching failure occurs and the +matcher backtracks in the normal way. +. +. .SH "MATCHING ERRORS" .rs .sp @@ -683,7 +776,7 @@ affect the return code. .SH "SEE ALSO" .rs .sp -\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3). +\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3). . . .SH AUTHOR @@ -700,6 +793,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 03 January 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 31 December 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2grep.txt b/pcre2/doc/pcre2grep.txt index 29cd75cfa..76c9cc1f3 100644 --- a/pcre2/doc/pcre2grep.txt +++ b/pcre2/doc/pcre2grep.txt @@ -51,103 +51,115 @@ DESCRIPTION boundary is controlled by the -N (--newline) option. The amount of memory used for buffering files that are being scanned is - controlled by a parameter that can be set by the --buffer-size option. - The default value for this parameter is specified when pcre2grep is - built, with the default default being 20K. A block of memory three - times this size is used (to allow for buffering "before" and "after" - lines). An error occurs if a line overflows the buffer. + controlled by parameters that can be set by the --buffer-size and + --max-buffer-size options. The first of these sets the size of buffer + that is obtained at the start of processing. If an input file contains + very long lines, a larger buffer may be needed; this is handled by + automatically extending the buffer, up to the limit specified by --max- + buffer-size. The default values for these parameters are specified when + pcre2grep is built, with the default defaults being 20K and 1M respec- + tively. An error occurs if a line is too long and the buffer can no + longer be expanded. - Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the - greater. BUFSIZ is defined in . When there is more than one + The block of memory that is actually used is three times the "buffer + size", to allow for buffering "before" and "after" lines. If the buffer + size is too small, fewer than requested "before" and "after" lines may + be output. + + Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the + greater. BUFSIZ is defined in . When there is more than one pattern (specified by the use of -e and/or -f), each pattern is applied - to each line in the order in which they are defined, except that all + to each line in the order in which they are defined, except that all the -e patterns are tried before the -f patterns. - By default, as soon as one pattern matches a line, no further patterns + By default, as soon as one pattern matches a line, no further patterns are considered. However, if --colour (or --color) is used to colour the - matching substrings, or if --only-matching, --file-offsets, or --line- - offsets is used to output only the part of the line that matched + matching substrings, or if --only-matching, --file-offsets, or --line- + offsets is used to output only the part of the line that matched (either shown literally, or as an offset), scanning resumes immediately - following the match, so that further matches on the same line can be - found. If there are multiple patterns, they are all tried on the - remainder of the line, but patterns that follow the one that matched + following the match, so that further matches on the same line can be + found. If there are multiple patterns, they are all tried on the + remainder of the line, but patterns that follow the one that matched are not tried on the earlier part of the line. - This behaviour means that the order in which multiple patterns are - specified can affect the output when one of the above options is used. - This is no longer the same behaviour as GNU grep, which now manages to - display earlier matches for later patterns (as long as there is no + This behaviour means that the order in which multiple patterns are + specified can affect the output when one of the above options is used. + This is no longer the same behaviour as GNU grep, which now manages to + display earlier matches for later patterns (as long as there is no overlap). - Patterns that can match an empty string are accepted, but empty string + Patterns that can match an empty string are accepted, but empty string matches are never recognized. An example is the pattern - "(super)?(man)?", in which all components are optional. This pattern - finds all occurrences of both "super" and "man"; the output differs - from matching with "super|man" when only the matching substrings are + "(super)?(man)?", in which all components are optional. This pattern + finds all occurrences of both "super" and "man"; the output differs + from matching with "super|man" when only the matching substrings are being shown. - If the LC_ALL or LC_CTYPE environment variable is set, pcre2grep uses + If the LC_ALL or LC_CTYPE environment variable is set, pcre2grep uses the value to set a locale when calling the PCRE2 library. The --locale option can be used to override this. SUPPORT FOR COMPRESSED FILES - It is possible to compile pcre2grep so that it uses libz or libbz2 to - read files whose names end in .gz or .bz2, respectively. You can find + It is possible to compile pcre2grep so that it uses libz or libbz2 to + read files whose names end in .gz or .bz2, respectively. You can find out whether your binary has support for one or both of these file types by running it with the --help option. If the appropriate support is not - present, files are treated as plain text. The standard input is always + present, files are treated as plain text. The standard input is always so treated. BINARY FILES - By default, a file that contains a binary zero byte within the first - 1024 bytes is identified as a binary file, and is processed specially. - (GNU grep also identifies binary files in this manner.) See the - --binary-files option for a means of changing the way binary files are + By default, a file that contains a binary zero byte within the first + 1024 bytes is identified as a binary file, and is processed specially. + (GNU grep also identifies binary files in this manner.) See the + --binary-files option for a means of changing the way binary files are handled. OPTIONS - The order in which some of the options appear can affect the output. - For example, both the -h and -l options affect the printing of file - names. Whichever comes later in the command line will be the one that - takes effect. Similarly, except where noted below, if an option is - given twice, the later setting is used. Numerical values for options - may be followed by K or M, to signify multiplication by 1024 or + The order in which some of the options appear can affect the output. + For example, both the -h and -l options affect the printing of file + names. Whichever comes later in the command line will be the one that + takes effect. Similarly, except where noted below, if an option is + given twice, the later setting is used. Numerical values for options + may be followed by K or M, to signify multiplication by 1024 or 1024*1024 respectively. -- This terminates the list of options. It is useful if the next - item on the command line starts with a hyphen but is not an - option. This allows for the processing of patterns and file + item on the command line starts with a hyphen but is not an + option. This allows for the processing of patterns and file names that start with hyphens. -A number, --after-context=number - Output number lines of context after each matching line. If - file names and/or line numbers are being output, a hyphen - separator is used instead of a colon for the context lines. A - line containing "--" is output between each group of lines, - unless they are in fact contiguous in the input file. The - value of number is expected to be relatively small. However, - pcre2grep guarantees to have up to 8K of following text - available for context output. + Output up to number lines of context after each matching + line. Fewer lines are output if the next match or the end of + the file is reached, or if the processing buffer size has + been set too small. If file names and/or line numbers are + being output, a hyphen separator is used instead of a colon + for the context lines. A line containing "--" is output + between each group of lines, unless they are in fact contigu- + ous in the input file. The value of number is expected to be + relatively small. When -c is used, -A is ignored. -a, --text Treat binary files as text. This is equivalent to --binary- files=text. -B number, --before-context=number - Output number lines of context before each matching line. If - file names and/or line numbers are being output, a hyphen - separator is used instead of a colon for the context lines. A - line containing "--" is output between each group of lines, - unless they are in fact contiguous in the input file. The - value of number is expected to be relatively small. However, - pcre2grep guarantees to have up to 8K of preceding text - available for context output. + Output up to number lines of context before each matching + line. Fewer lines are output if the previous match or the + start of the file is within number lines, or if the process- + ing buffer size has been set too small. If file names and/or + line numbers are being output, a hyphen separator is used + instead of a colon for the context lines. A line containing + "--" is output between each group of lines, unless they are + in fact contiguous in the input file. The value of number is + expected to be relatively small. When -c is used, -B is + ignored. --binary-files=word Specify how binary files are to be processed. If the word is @@ -164,54 +176,68 @@ OPTIONS any output or affecting the return code. --buffer-size=number - Set the parameter that controls how much memory is used for - buffering files that are being scanned. + Set the parameter that controls how much memory is obtained + at the start of processing for buffering files that are being + scanned. See also --max-buffer-size below. -C number, --context=number - Output number lines of context both before and after each - matching line. This is equivalent to setting both -A and -B + Output number lines of context both before and after each + matching line. This is equivalent to setting both -A and -B to the same value. -c, --count - Do not output lines from the files that are being scanned; - instead output the number of matches (or non-matches if -v is - used) that would otherwise have caused lines to be shown. By - default, this count is the same as the number of suppressed - lines, but if the -M (multiline) option is used (without -v), - there may be more suppressed lines than the number of - matches. + Do not output lines from the files that are being scanned; + instead output the number of lines that would have been + shown, either because they matched, or, if -v is set, because + they failed to match. By default, this count is exactly the + same as the number of lines that would have been output, but + if the -M (multiline) option is used (without -v), there may + be more suppressed lines than the count (that is, the number + of matches). If no lines are selected, the number zero is output. If sev- eral files are are being scanned, a count is output for each - of them. However, if the --files-with-matches option is also - used, only those files whose counts are greater than zero are - listed. When -c is used, the -A, -B, and -C options are - ignored. + of them and the -t option can be used to cause a total to be + output at the end. However, if the --files-with-matches + option is also used, only those files whose counts are + greater than zero are listed. When -c is used, the -A, -B, + and -C options are ignored. --colour, --color If this option is given without any data, it is equivalent to - "--colour=auto". If data is required, it must be given in + "--colour=auto". If data is required, it must be given in the same shell item, separated by an equals sign. --colour=value, --color=value This option specifies under what circumstances the parts of a line that matched a pattern should be coloured in the output. - By default, the output is not coloured. The value (which is - optional, see above) may be "never", "always", or "auto". In - the latter case, colouring happens only if the standard out- - put is connected to a terminal. More resources are used when + By default, the output is not coloured. The value (which is + optional, see above) may be "never", "always", or "auto". In + the latter case, colouring happens only if the standard out- + put is connected to a terminal. More resources are used when colouring is enabled, because pcre2grep has to search for all - possible matches in a line, not just one, in order to colour + possible matches in a line, not just one, in order to colour them all. - The colour that is used can be specified by setting the envi- - ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The - value of this variable should be a string of two numbers, - separated by a semicolon. They are copied directly into the - control string for setting colour on a terminal, so it is - your responsibility to ensure that they make sense. If nei- - ther of the environment variables is set, the default is - "1;31", which gives red. + The colour that is used can be specified by setting one of + the environment variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, + PCREGREP_COLOUR, or PCREGREP_COLOR, which are checked in that + order. If none of these are set, pcre2grep looks for + GREP_COLORS or GREP_COLOR (in that order). The value of the + variable should be a string of two numbers, separated by a + semicolon, except in the case of GREP_COLORS, which must + start with "ms=" or "mt=" followed by two semicolon-separated + colours, terminated by the end of the string or by a colon. + If GREP_COLORS does not start with "ms=" or "mt=" it is + ignored, and GREP_COLOR is checked. + + If the string obtained from one of the above variables con- + tains any characters other than semicolon or digits, the set- + ting is ignored and the default colour is used. The string is + copied directly into the control string for setting colour on + a terminal, so it is your responsibility to ensure that the + values make sense. If no relevant environment variable is + set, the default is "1;31", which gives red. -D action, --devices=action If an input path is not a regular file or a directory, @@ -299,12 +325,12 @@ OPTIONS Read patterns from the file, one per line, and match them against each line of input. What constitutes a newline when reading the file is the operating system's default. The - --newline option has no effect on this option. Trailing white - space is removed from each line, and blank lines are ignored. - An empty file contains no patterns and therefore matches - nothing. See also the comments about multiple patterns versus - a single pattern with alternatives in the description of -e - above. + --newline option has no effect on this option. Trailing + white space is removed from each line, and blank lines are + ignored. An empty file contains no patterns and therefore + matches nothing. See also the comments about multiple pat- + terns versus a single pattern with alternatives in the + description of -e above. If this option is given more than once, all the specified files are read. A data line is output if any of the patterns @@ -482,96 +508,101 @@ OPTIONS tings are specified when the PCRE2 library is compiled, with the default default being 10 million. - -M, --multiline - Allow patterns to match more than one line. When this option - is given, patterns may usefully contain literal newline char- - acters and internal occurrences of ^ and $ characters. The - output for a successful match may consist of more than one - line. The first is the line in which the match started, and - the last is the line in which the match ended. If the matched - string ends with a newline sequence the output ends at the - end of that line. + --max-buffer-size=number + This limits the expansion of the processing buffer, whose + initial size can be set by --buffer-size. The maximum buffer + size is silently forced to be no smaller than the starting + buffer size. - When this option is set, the PCRE2 library is called in "mul- - tiline" mode. However, pcre2grep still processes the input - line by line. The difference is that a matched string may - extend past the end of a line and continue on one or more - subsequent lines. The newline sequence must be matched as - part of the pattern. For example, to find the phrase "regular - expression" in a file where "regular" might be at the end of - a line and "expression" at the start of the next line, you - could use this command: + -M, --multiline + Allow patterns to match more than one line. When this option + is set, the PCRE2 library is called in "multiline" mode. This + allows a matched string to extend past the end of a line and + continue on one or more subsequent lines. Patterns used with + -M may usefully contain literal newline characters and inter- + nal occurrences of ^ and $ characters. The output for a suc- + cessful match may consist of more than one line. The first + line is the line in which the match started, and the last + line is the line in which the match ended. If the matched + string ends with a newline sequence, the output ends at the + end of that line. If -v is set, none of the lines in a + multi-line match are output. Once a match has been handled, + scanning restarts at the beginning of the line after the one + in which the match ended. + + The newline sequence that separates multiple lines must be + matched as part of the pattern. For example, to find the + phrase "regular expression" in a file where "regular" might + be at the end of a line and "expression" at the start of the + next line, you could use this command: pcre2grep -M 'regular\s+expression' - The \s escape sequence matches any white space character, - including newlines, and is followed by + so as to match - trailing white space on the first line as well as possibly + The \s escape sequence matches any white space character, + including newlines, and is followed by + so as to match + trailing white space on the first line as well as possibly handling a two-character newline sequence. - There is a limit to the number of lines that can be matched, - imposed by the way that pcre2grep buffers the input file as - it scans it. However, pcre2grep ensures that at least 8K - characters or the rest of the file (whichever is the shorter) - are available for forward matching, and similarly the previ- - ous 8K characters (or all the previous characters, if fewer - than 8K) are guaranteed to be available for lookbehind asser- - tions. The -M option does not work when input is read line by - line (see --line-buffered.) + There is a limit to the number of lines that can be matched, + imposed by the way that pcre2grep buffers the input file as + it scans it. With a sufficiently large processing buffer, + this should not be a problem, but the -M option does not work + when input is read line by line (see --line-buffered.) -N newline-type, --newline=newline-type - The PCRE2 library supports five different conventions for - indicating the ends of lines. They are the single-character - sequences CR (carriage return) and LF (linefeed), the two- - character sequence CRLF, an "anycrlf" convention, which rec- - ognizes any of the preceding three types, and an "any" con- + The PCRE2 library supports five different conventions for + indicating the ends of lines. They are the single-character + sequences CR (carriage return) and LF (linefeed), the two- + character sequence CRLF, an "anycrlf" convention, which rec- + ognizes any of the preceding three types, and an "any" con- vention, in which any Unicode line ending sequence is assumed - to end a line. The Unicode sequences are the three just men- - tioned, plus VT (vertical tab, U+000B), FF (form feed, - U+000C), NEL (next line, U+0085), LS (line separator, + to end a line. The Unicode sequences are the three just men- + tioned, plus VT (vertical tab, U+000B), FF (form feed, + U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS (paragraph separator, U+2029). - When the PCRE2 library is built, a default line-ending - sequence is specified. This is normally the standard + When the PCRE2 library is built, a default line-ending + sequence is specified. This is normally the standard sequence for the operating system. Unless otherwise specified - by this option, pcre2grep uses the library's default. The + by this option, pcre2grep uses the library's default. The possible values for this option are CR, LF, CRLF, ANYCRLF, or - ANY. This makes it possible to use pcre2grep to scan files + ANY. This makes it possible to use pcre2grep to scan files that have come from other environments without having to mod- - ify their line endings. If the data that is being scanned - does not agree with the convention set by this option, - pcre2grep may behave in strange ways. Note that this option - does not apply to files specified by the -f, --exclude-from, - or --include-from options, which are expected to use the + ify their line endings. If the data that is being scanned + does not agree with the convention set by this option, + pcre2grep may behave in strange ways. Note that this option + does not apply to files specified by the -f, --exclude-from, + or --include-from options, which are expected to use the operating system's standard newline sequence. -n, --line-number Precede each output line by its line number in the file, fol- - lowed by a colon for matching lines or a hyphen for context + lowed by a colon for matching lines or a hyphen for context lines. If the file name is also being output, it precedes the - line number. When the -M option causes a pattern to match - more than one line, only the first is preceded by its line + line number. When the -M option causes a pattern to match + more than one line, only the first is preceded by its line number. This option is forced if --line-offsets is used. - --no-jit If the PCRE2 library is built with support for just-in-time + --no-jit If the PCRE2 library is built with support for just-in-time compiling (which speeds up matching), pcre2grep automatically makes use of this, unless it was explicitly disabled at build - time. This option can be used to disable the use of JIT at - run time. It is provided for testing and working round prob- + time. This option can be used to disable the use of JIT at + run time. It is provided for testing and working round prob- lems. It should never be needed in normal use. -o, --only-matching Show only the part of the line that matched a pattern instead - of the whole line. In this mode, no context is shown. That - is, the -A, -B, and -C options are ignored. If there is more - than one match in a line, each of them is shown separately. - If -o is combined with -v (invert the sense of the match to - find non-matching lines), no output is generated, but the - return code is set appropriately. If the matched portion of - the line is empty, nothing is output unless the file name or - line number are being printed, in which case they are shown - on an otherwise empty line. This option is mutually exclusive - with --file-offsets and --line-offsets. + of the whole line. In this mode, no context is shown. That + is, the -A, -B, and -C options are ignored. If there is more + than one match in a line, each of them is shown separately, + on a separate line of output. If -o is combined with -v + (invert the sense of the match to find non-matching lines), + no output is generated, but the return code is set appropri- + ately. If the matched portion of the line is empty, nothing + is output unless the file name or line number are being + printed, in which case they are shown on an otherwise empty + line. This option is mutually exclusive with --file-offsets + and --line-offsets. -onumber, --only-matching=number Show only the part of the line that matched the capturing @@ -587,65 +618,80 @@ OPTIONS put. If this option is given multiple times, multiple substrings - are output, in the order the options are given. For example, - -o3 -o1 -o3 causes the substrings matched by capturing paren- - theses 3 and 1 and then 3 again to be output. By default, - there is no separator (but see the next option). + are output for each match, in the order the options are + given, and all on one line. For example, -o3 -o1 -o3 causes + the substrings matched by capturing parentheses 3 and 1 and + then 3 again to be output. By default, there is no separator + (but see the next option). --om-separator=text - Specify a separating string for multiple occurrences of -o. - The default is an empty string. Separating strings are never + Specify a separating string for multiple occurrences of -o. + The default is an empty string. Separating strings are never coloured. -q, --quiet Work quietly, that is, display nothing except error messages. - The exit status indicates whether or not any matches were + The exit status indicates whether or not any matches were found. -r, --recursive - If any given path is a directory, recursively scan the files - it contains, taking note of any --include and --exclude set- - tings. By default, a directory is read as a normal file; in - some operating systems this gives an immediate end-of-file. - This option is a shorthand for setting the -d option to + If any given path is a directory, recursively scan the files + it contains, taking note of any --include and --exclude set- + tings. By default, a directory is read as a normal file; in + some operating systems this gives an immediate end-of-file. + This option is a shorthand for setting the -d option to "recurse". --recursion-limit=number See --match-limit above. -s, --no-messages - Suppress error messages about non-existent or unreadable - files. Such files are quietly skipped. However, the return + Suppress error messages about non-existent or unreadable + files. Such files are quietly skipped. However, the return code is still 2, even if matches were found in other files. + -t, --total-count + This option is useful when scanning more than one file. If + used on its own, -t suppresses all output except for a grand + total number of matching lines (or non-matching lines if -v + is used) in all the files. If -t is used with -c, a grand + total is output except when the previous output is just one + line. In other words, it is not output when just one file's + count is listed. If file names are being output, the grand + total is preceded by "TOTAL:". Otherwise, it appears as just + another number. The -t option is ignored when used with -L + (list files without matches), because the grand total would + always be zero. + -u, --utf-8 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled with UTF-8 support. All patterns (including - those for any --exclude and --include options) and all sub- - ject lines that are scanned must be valid strings of UTF-8 + those for any --exclude and --include options) and all sub- + ject lines that are scanned must be valid strings of UTF-8 characters. -V, --version - Write the version numbers of pcre2grep and the PCRE2 library - to the standard output and then exit. Anything else on the + Write the version numbers of pcre2grep and the PCRE2 library + to the standard output and then exit. Anything else on the command line is ignored. -v, --invert-match - Invert the sense of the match, so that lines which do not + Invert the sense of the match, so that lines which do not match any of the patterns are the ones that are found. -w, --word-regex, --word-regexp Force the patterns to match only whole words. This is equiva- - lent to having \b at the start and end of the pattern. This - option applies only to the patterns that are matched against - the contents of files; it does not apply to patterns speci- + lent to having \b at the start and end of the pattern. This + option applies only to the patterns that are matched against + the contents of files; it does not apply to patterns speci- fied by any of the --include or --exclude options. -x, --line-regex, --line-regexp - Force the patterns to be anchored (each must start matching - at the beginning of a line) and in addition, require them to - match entire lines. This is equivalent to having ^ and $ - characters at the start and end of each alternative top-level + Force the patterns to be anchored (each must start matching + at the beginning of a line) and in addition, require them to + match entire lines. In multiline mode the match may be more + than one line. This is equivalent to having \A and \Z charac- + ters at the start and end of each alternative top-level branch in every pattern. This option applies only to the pat- terns that are matched against the contents of files; it does not apply to patterns specified by any of the --include or @@ -725,35 +771,86 @@ OPTIONS WITH DATA equals character. Otherwise pcre2grep will assume that it has no data. +CALLING EXTERNAL SCRIPTS + + pcre2grep has, by default, support for calling external programs or + scripts during matching by making use of PCRE2's callout facility. How- + ever, this support can be disabled when pcre2grep is built. You can + find out whether your binary has support for callouts by running it + with the --help option. If the support is not enabled, all callouts in + patterns are ignored by pcre2grep. + + A callout in a PCRE2 pattern is of the form (?C) where the argu- + ment is either a number or a quoted string (see the pcre2callout docu- + mentation for details). Numbered callouts are ignored by pcre2grep. + String arguments are parsed as a list of substrings separated by pipe + (vertical bar) characters. The first substring must be an executable + name, with the following substrings specifying arguments: + + executable_name|arg1|arg2|... + + Any substring (including the executable name) may contain escape + sequences started by a dollar character: $ or ${} is + replaced by the captured substring of the given decimal number, which + must be greater than zero. If the number is greater than the number of + capturing substrings, or if the capture is unset, the replacement is + empty. + + Any other character is substituted by itself. In particular, $$ is + replaced by a single dollar and $| is replaced by a pipe character. + Here is an example: + + echo -e "abcde\n12345" | pcre2grep \ + '(?x)(.)(..(.)) + (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - + + Output: + + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 + + The parameters for the execv() system call that is used to run the pro- + gram or script are zero-terminated strings. This means that binary zero + characters in the callout argument will cause premature termination of + their substrings, and therefore should not be present. Any syntax + errors in the string (for example, a dollar not followed by another + character) cause the callout to be ignored. If running the program + fails for any reason (including the non-existence of the executable), a + local matching failure occurs and the matcher backtracks in the normal + way. + + MATCHING ERRORS - It is possible to supply a regular expression that takes a very long - time to fail to match certain lines. Such patterns normally involve - nested indefinite repeats, for example: (a+)*\d when matched against a - line of a's with no final digit. The PCRE2 matching function has a - resource limit that causes it to abort in these circumstances. If this - happens, pcre2grep outputs an error message and the line that caused - the problem to the standard error stream. If there are more than 20 + It is possible to supply a regular expression that takes a very long + time to fail to match certain lines. Such patterns normally involve + nested indefinite repeats, for example: (a+)*\d when matched against a + line of a's with no final digit. The PCRE2 matching function has a + resource limit that causes it to abort in these circumstances. If this + happens, pcre2grep outputs an error message and the line that caused + the problem to the standard error stream. If there are more than 20 such errors, pcre2grep gives up. - The --match-limit option of pcre2grep can be used to set the overall - resource limit; there is a second option called --recursion-limit that - sets a limit on the amount of memory (usually stack) that is used (see + The --match-limit option of pcre2grep can be used to set the overall + resource limit; there is a second option called --recursion-limit that + sets a limit on the amount of memory (usually stack) that is used (see the discussion of these options above). DIAGNOSTICS Exit status is 0 if any matches were found, 1 if no matches were found, - and 2 for syntax errors, overlong lines, non-existent or inaccessible - files (even if matches were found in other files) or too many matching + and 2 for syntax errors, overlong lines, non-existent or inaccessible + files (even if matches were found in other files) or too many matching errors. Using the -s option to suppress error messages about inaccessi- ble files does not affect the return code. SEE ALSO - pcre2pattern(3), pcre2syntax(3). + pcre2pattern(3), pcre2syntax(3), pcre2callout(3). AUTHOR @@ -765,5 +862,5 @@ AUTHOR REVISION - Last updated: 03 January 2015 - Copyright (c) 1997-2015 University of Cambridge. + Last updated: 31 December 2016 + Copyright (c) 1997-2016 University of Cambridge. diff --git a/pcre2/doc/pcre2jit.3 b/pcre2/doc/pcre2jit.3 index 3f2071ba2..0b95b4dc6 100644 --- a/pcre2/doc/pcre2jit.3 +++ b/pcre2/doc/pcre2jit.3 @@ -1,4 +1,4 @@ -.TH PCRE2JIT 3 "27 November 2014" "PCRE2 10.00" +.TH PCRE2JIT 3 "05 June 2016" "PCRE2 10.22" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT" @@ -61,6 +61,12 @@ much faster than the normal interpretive code, but yields exactly the same results. The returned value from \fBpcre2_jit_compile()\fP is zero on success, or a negative error code. .P +There is a limit to the size of pattern that JIT supports, imposed by the size +of machine stack that it uses. The exact rules are not documented because they +may change at any time, in particular, when new optimizations are introduced. +If a pattern is too big, a call to \fBpcre2_jit_compile()\fB returns +PCRE2_ERROR_NOMEMORY. +.P PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or PCRE2_PARTIAL_SOFT options of \fBpcre2_match()\fP, you should set one or both @@ -122,6 +128,9 @@ PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED option is not supported at match time. .P +If the PCRE2_NO_JIT option is passed to \fBpcre2_match()\fP it disables the +use of JIT, forcing matching by the interpreter code. +.P The only unsupported pattern items are \eC (match a single data unit) when running in a UTF mode, and a callout immediately before an assertion condition in a conditional group. @@ -207,8 +216,13 @@ for JIT matching. A callback function can therefore be used to determine whether a match operation was executed by JIT or by the interpreter. .P You may safely use the same JIT stack for more than one pattern (either by -assigning directly or by callback), as long as the patterns are all matched -sequentially in the same thread. In a multithread application, if you do not +assigning directly or by callback), as long as the patterns are matched +sequentially in the same thread. Currently, the only way to set up +non-sequential matches in one thread is to use callouts: if a callout function +starts another match, that match must use a different JIT stack to the one used +for currently suspended match(es). +.P +In a multithread application, if you do not specify a JIT stack, or if you assign or pass back NULL from a callback, that is thread-safe, because each thread has its own machine stack. However, if you assign or pass back a non-NULL JIT stack, this must be a different stack for @@ -366,7 +380,7 @@ The fast path function is called \fBpcre2_jit_match()\fP, and it takes exactly the same arguments as \fBpcre2_match()\fP. The return values are also the same, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested that was not compiled. Unsupported option bits (for example, -PCRE2_ANCHORED) are ignored. +PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT option. .P When you call \fBpcre2_match()\fP, as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For example, if @@ -399,6 +413,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 27 November 2014 -Copyright (c) 1997-2014 University of Cambridge. +Last updated: 05 June 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2limits.3 b/pcre2/doc/pcre2limits.3 index 898254618..805e42af7 100644 --- a/pcre2/doc/pcre2limits.3 +++ b/pcre2/doc/pcre2limits.3 @@ -1,4 +1,4 @@ -.TH PCRE2LIMITS 3 "25 November 2014" "PCRE2 10.00" +.TH PCRE2LIMITS 3 "26 October 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "SIZE AND OTHER LIMITATIONS" @@ -20,6 +20,10 @@ documentation for details. In these cases the limit is substantially larger. However, the speed of execution is slower. In the 32-bit library, the internal linkage size is always 4. .P +The maximum length of a source pattern string is essentially unlimited; it is +the largest number a PCRE2_SIZE variable can hold. However, the program that +calls \fBpcre2_compile()\fP can specify a smaller limit. +.P The maximum length (in code units) of a subject string is one less than the largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned integer type, usually defined as size_t. Its maximum value (that is @@ -37,22 +41,25 @@ documentation. .P All values in repeating quantifiers must be less than 65536. .P +The maximum length of a lookbehind assertion is 65535 characters. +.P There is no limit to the number of parenthesized subpatterns, but there can be no more than 65535 capturing subpatterns. There is, however, a limit to the depth of nesting of parenthesized subpatterns of all kinds. This is imposed in -order to limit the amount of system stack used at compile time. The limit can -be specified when PCRE2 is built; the default is 250. -.P -There is a limit to the number of forward references to subsequent subpatterns -of around 200,000. Repeated forward references with fixed upper limits, for -example, (?2){0,100} when subpattern number 2 is to the right, are included in -the count. There is no limit to the number of backward references. +order to limit the amount of system stack used at compile time. The default +limit can be specified when PCRE2 is built; the default default is 250. An +application can change this limit by calling pcre2_set_parens_nest_limit() to +set the limit in a compile context. .P The maximum length of name for a named subpattern is 32 code units, and the maximum number of named subpatterns is 10000. .P The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb -is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries. +is 255 code units for the 8-bit library and 65535 code units for the 16-bit and +32-bit libraries. +.P +The maximum length of a string argument to a callout is the largest number a +32-bit unsigned integer can hold. . . .SH AUTHOR @@ -69,6 +76,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 25 November 2014 -Copyright (c) 1997-2014 University of Cambridge. +Last updated: 26 October 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2pattern.3 b/pcre2/doc/pcre2pattern.3 index 192859dd3..4c869c1b7 100644 --- a/pcre2/doc/pcre2pattern.3 +++ b/pcre2/doc/pcre2pattern.3 @@ -1,4 +1,4 @@ -.TH PCRE2PATTERN 3 "13 June 2015" "PCRE2 10.20" +.TH PCRE2PATTERN 3 "27 December 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION DETAILS" @@ -158,6 +158,11 @@ be less than the value set (or defaulted) by the caller of \fBpcre2_match()\fP for it to have any effect. In other words, the pattern writer can lower the limits set by the programmer, but not raise them. If there is more than one setting of one of these limits, the lower value is used. +.P +The match limit is used (but in a different way) when JIT is being used, but it +is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP. +However, the recursion limit is relevant for DFA matching, which does use some +function recursion, in particular, for recursions within the pattern. . . .\" HTML @@ -359,29 +364,28 @@ case letter, it is converted to upper case. Then bit 6 of the character (hex 40) is inverted. Thus \ecA to \ecZ become hex 01 to hex 1A (A is 41, Z is 5A), but \ec{ becomes hex 3B ({ is 7B), and \ec; becomes hex 7B (; is 3B). If the code unit following \ec has a value less than 32 or greater than 126, a -compile-time error occurs. This locks out non-printable ASCII characters in all -modes. +compile-time error occurs. .P When PCRE2 is compiled in EBCDIC mode, \ea, \ee, \ef, \en, \er, and \et generate the appropriate EBCDIC code values. The \ec escape is processed as specified for Perl in the \fBperlebcdic\fP document. The only characters that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ], ^, _, or ?. Any -other character provokes a compile-time error. The sequence \e@ encodes -character code 0; the letters (in either case) encode characters 1-26 (hex 01 -to hex 1A); [, \e, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and -\e? becomes either 255 (hex FF) or 95 (hex 5F). +other character provokes a compile-time error. The sequence \ec@ encodes +character code 0; after \ec the letters (in either case) encode characters 1-26 +(hex 01 to hex 1A); [, \e, ], ^, and _ encode characters 27-31 (hex 1B to hex +1F), and \ec? becomes either 255 (hex FF) or 95 (hex 5F). .P -Thus, apart from \e?, these escapes generate the same character code values as +Thus, apart from \ec?, these escapes generate the same character code values as they do in an ASCII environment, though the meanings of the values mostly -differ. For example, \eG always generates code value 7, which is BEL in ASCII +differ. For example, \ecG always generates code value 7, which is BEL in ASCII but DEL in EBCDIC. .P -The sequence \e? generates DEL (127, hex 7F) in an ASCII environment, but +The sequence \ec? generates DEL (127, hex 7F) in an ASCII environment, but because 127 is not a control character in EBCDIC, Perl makes it generate the APC character. Unfortunately, there are several variants of EBCDIC. In most of them the APC character has the value 255 (hex FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC -values, PCRE2 makes \e? generate 95; otherwise it generates 255. +values, PCRE2 makes \ec? generate 95; otherwise it generates 255. .P After \e0 up to two further octal digits are read. If there are fewer than two digits, just those that are present are used. Thus the sequence \e0\ex\e015 @@ -508,9 +512,9 @@ by code point, as described in the previous section. .SS "Absolute and relative back references" .rs .sp -The sequence \eg followed by an unsigned or a negative number, optionally -enclosed in braces, is an absolute or relative back reference. A named back -reference can be coded as \eg{name}. Back references are discussed +The sequence \eg followed by a signed or unsigned number, optionally enclosed +in braces, is an absolute or relative back reference. A named back reference +can be coded as \eg{name}. Back references are discussed .\" HTML .\" later, @@ -671,8 +675,8 @@ below. This particular group matches either the two-character sequence CR followed by LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next -line, U+0085). The two-character sequence is treated as a single unit that -cannot be split. +line, U+0085). Because this is an atomic group, the two-character sequence is +treated as a single unit that cannot be split. .P In other modes, two additional characters whose codepoints are greater than 255 are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029). @@ -738,6 +742,8 @@ example: Those that are not part of an identified script are lumped together as "Common". The current list of scripts is: .P +Ahom, +Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, @@ -778,6 +784,7 @@ Gurmukhi, Han, Hangul, Hanunoo, +Hatran, Hebrew, Hiragana, Imperial_Aramaic, @@ -814,12 +821,14 @@ Miao, Modi, Mongolian, Mro, +Multani, Myanmar, Nabataean, New_Tai_Lue, Nko, Ogham, Ol_Chiki, +Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, @@ -841,6 +850,7 @@ Saurashtra, Sharada, Shavian, Siddham, +SignWriting, Sinhala, Sora_Sompeng, Sundanese, @@ -1177,6 +1187,18 @@ patterns that are anchored in single line mode because all branches start with when the \fIstartoffset\fP argument of \fBpcre2_match()\fP is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is set. .P +When the newline convention (see +.\" HTML +.\" +"Newline conventions" +.\" +below) recognizes the two-character sequence CRLF as a newline, this is +preferred, even if the single characters CR and LF are also recognized as +newlines. For example, if the newline convention is "any", a multiline mode +circumflex matches before "xyz" in the string "abc\er\enxyz" rather than after +CR, even though CR on its own is a valid newline. (It also matches at the very +start of the string, of course.) +.P Note that the sequences \eA, \eZ, and \ez can be used to match the start and end of the subject in both modes, and if all branches of a pattern start with \eA it is always anchored, whether or not PCRE2_MULTILINE is set. @@ -1227,21 +1249,31 @@ with \eC in UTF-8 or UTF-16 mode means that the rest of the string may start with a malformed UTF character. This has undefined results, because PCRE2 assumes that it is matching character by character in a valid UTF string (by default it checks the subject string's validity at the start of processing -unless the PCRE2_NO_UTF_CHECK option is used). An application can lock out the -use of \eC by setting the PCRE2_NEVER_BACKSLASH_C option. +unless the PCRE2_NO_UTF_CHECK option is used). +.P +An application can lock out the use of \eC by setting the +PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to +build PCRE2 with the use of \eC permanently disabled. .P PCRE2 does not allow \eC to appear in lookbehind assertions .\" HTML .\" (described below) .\" -in a UTF mode, because this would make it impossible to calculate the length of -the lookbehind. +in UTF-8 or UTF-16 modes, because this would make it impossible to calculate +the length of the lookbehind. Neither the alternative matching function +\fBpcre2_dfa_match()\fP nor the JIT optimizer support \eC in these UTF modes. +The former gives a match-time error; the latter fails to optimize and so the +match is always run using the interpreter. +.P +In the 32-bit library, however, \eC is always supported (when not explicitly +locked out) because it always matches a single code unit, whether or not UTF-32 +is specified. .P In general, the \eC escape sequence is best avoided. However, one way of using -it that avoids the problem of malformed UTF characters is to use a lookahead to -check the length of the next character, as in this pattern, which could be used -with a UTF-8 string (ignore white space and line breaks): +it that avoids the problem of malformed UTF-8 or UTF-16 characters is to use a +lookahead to check the length of the next character, as in this pattern, which +could be used with a UTF-8 string (ignore white space and line breaks): .sp (?| (?=[\ex00-\ex7f])(\eC) | (?=[\ex80-\ex{7ff}])(\eC)(\eC) | @@ -1297,37 +1329,6 @@ when matching character classes, whatever line-ending sequence is in use, and whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A class such as [^a] always matches one of these characters. .P -The minus (hyphen) character can be used to specify a range of characters in a -character class. For example, [d-m] matches any letter between d and m, -inclusive. If a minus character is required in a class, it must be escaped with -a backslash or appear in a position where it cannot be interpreted as -indicating a range, typically as the first or last character in the class, or -immediately after a range. For example, [b-d-z] matches letters in the range b -to d, a hyphen character, or z. -.P -It is not possible to have the literal character "]" as the end character of a -range. A pattern such as [W-]46] is interpreted as a class of two characters -("W" and "-") followed by a literal string "46]", so it would match "W46]" or -"-46]". However, if the "]" is escaped with a backslash it is interpreted as -the end of range, so [W-\e]46] is interpreted as a class containing a range -followed by two other characters. The octal or hexadecimal representation of -"]" can also be used to end a range. -.P -An error is generated if a POSIX character class (see below) or an escape -sequence other than one that defines a single character appears at a point -where a range ending character is expected. For example, [z-\exff] is valid, -but [A-\ed] and [A-[:digit:]] are not. -.P -Ranges operate in the collating sequence of character values. They can also be -used for characters specified numerically, for example [\e000-\e037]. Ranges -can include any characters that are valid for the current mode. -.P -If a range that includes letters is used when caseless matching is set, it -matches the letters in either case. For example, [W-c] is equivalent to -[][\e\e^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character -tables for a French locale are in use, [\exc8-\excb] matches accented E -characters in both cases. -.P The character escape sequences \ed, \eD, \eh, \eH, \ep, \eP, \es, \eS, \ev, \eV, \ew, and \eW may appear in a character class, and add the characters that they match to the class. For example, [\edABCDEF] matches any hexadecimal @@ -1343,6 +1344,46 @@ class; it matches the backspace character. The sequences \eB, \eN, \eR, and \eX are not special inside a character class. Like any other unrecognized escape sequences, they cause an error. .P +The minus (hyphen) character can be used to specify a range of characters in a +character class. For example, [d-m] matches any letter between d and m, +inclusive. If a minus character is required in a class, it must be escaped with +a backslash or appear in a position where it cannot be interpreted as +indicating a range, typically as the first or last character in the class, +or immediately after a range. For example, [b-d-z] matches letters in the range +b to d, a hyphen character, or z. +.P +Perl treats a hyphen as a literal if it appears before or after a POSIX class +(see below) or a character type escape such as as \ed, but gives a warning in +its warning mode, as this is most likely a user error. As PCRE2 has no facility +for warning, an error is given in these cases. +.P +It is not possible to have the literal character "]" as the end character of a +range. A pattern such as [W-]46] is interpreted as a class of two characters +("W" and "-") followed by a literal string "46]", so it would match "W46]" or +"-46]". However, if the "]" is escaped with a backslash it is interpreted as +the end of range, so [W-\e]46] is interpreted as a class containing a range +followed by two other characters. The octal or hexadecimal representation of +"]" can also be used to end a range. +.P +Ranges normally include all code points between the start and end characters, +inclusive. They can also be used for code points specified numerically, for +example [\e000-\e037]. Ranges can include any characters that are valid for the +current mode. +.P +There is a special case in EBCDIC environments for ranges whose end points are +both specified as literal letters in the same case. For compatibility with +Perl, EBCDIC code points within the range that are not letters are omitted. For +example, [h-k] matches only four characters, even though the codes for h and k +are 0x88 and 0x92, a range of 11 code points. However, if the range is +specified numerically, for example, [\ex88-\ex92] or [h-\ex92], all code points +are included. +.P +If a range that includes letters is used when caseless matching is set, it +matches the letters in either case. For example, [W-c] is equivalent to +[][\e\e^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character +tables for a French locale are in use, [\exc8-\excb] matches accented E +characters in both cases. +.P A circumflex can conveniently be used with the upper case character types to specify a more restricted set of characters than the matching lower case type. For example, the class [^\eW_] matches any letter or digit, but not underscore, @@ -1514,12 +1555,8 @@ respectively. .P When one of these option changes occurs at top level (that is, not inside subpattern parentheses), the change applies to the remainder of the pattern -that follows. If the change is placed right at the start of a pattern, PCRE2 -extracts it into the global options (and it will therefore show up in data -extracted by the \fBpcre2_pattern_info()\fP function). -.P -An option change within a subpattern (see below for a description of -subpatterns) affects only that part of the subpattern that follows it, so +that follows. An option change within a subpattern (see below for a description +of subpatterns) affects only that part of the subpattern that follows it, so .sp (a(?i)b)c .sp @@ -1650,6 +1687,9 @@ first one in the pattern with the given number. The following pattern matches .sp /(?|(abc)|(def))(?1)/ .sp +A relative reference such as (?-1) is no different: it is just a convenient way +of computing an absolute group number. +.P If a .\" HTML .\" @@ -2056,9 +2096,9 @@ no such problem when named parentheses are used. A back reference to any subpattern is possible using named parentheses (see below). .P Another way of avoiding the ambiguity inherent in the use of digits following a -backslash is to use the \eg escape sequence. This escape must be followed by an -unsigned number or a negative number, optionally enclosed in braces. These -examples are all identical: +backslash is to use the \eg escape sequence. This escape must be followed by a +signed or unsigned number, optionally enclosed in braces. These examples are +all identical: .sp (ring), \e1 (ring), \eg1 @@ -2066,8 +2106,7 @@ examples are all identical: .sp An unsigned number specifies an absolute reference without the ambiguity that is present in the older syntax. It is also useful when literal digits follow -the reference. A negative number is a relative reference. Consider this -example: +the reference. A signed number is a relative reference. Consider this example: .sp (abc(def)ghi)\eg{-1} .sp @@ -2077,6 +2116,10 @@ Similarly, \eg{-2} would be equivalent to \e1. The use of relative references can be helpful in long patterns, and also in patterns that are created by joining together fragments that contain references within themselves. .P +The sequence \eg{+1} is a reference to the next capturing subpattern. This kind +of forward reference can be useful it patterns that repeat. Perl does not +support the use of + in this way. +.P A back reference matches whatever actually matched the capturing subpattern in the current subject string, rather than anything matching the subpattern itself (see @@ -2184,6 +2227,13 @@ numbering the capturing subpatterns in the whole pattern. However, substring capturing is carried out only for positive assertions. (Perl sometimes, but not always, does do capturing in negative assertions.) .P +WARNING: If a positive assertion containing one or more capturing subpatterns +succeeds, but failure to match later in the pattern causes backtracking over +this assertion, the captures within the assertion are reset only if no higher +numbered captures are already set. This is, unfortunately, a fundamental +limitation of the current implementation; it may get removed in a future +reworking. +.P For compatibility with Perl, most assertion subpatterns may be repeated; though it makes no sense to assert the same thing several times, the side effect of capturing parentheses may occasionally be useful. However, an assertion that @@ -2281,23 +2331,34 @@ temporarily move the current position back by the fixed length and then try to match. If there are insufficient characters before the current position, the assertion fails. .P -In a UTF mode, PCRE2 does not allow the \eC escape (which matches a single code -unit even in a UTF mode) to appear in lookbehind assertions, because it makes -it impossible to calculate the length of the lookbehind. The \eX and \eR -escapes, which can match different numbers of code units, are also not -permitted. +In UTF-8 and UTF-16 modes, PCRE2 does not allow the \eC escape (which matches a +single code unit even in a UTF mode) to appear in lookbehind assertions, +because it makes it impossible to calculate the length of the lookbehind. The +\eX and \eR escapes, which can match different numbers of code units, are never +permitted in lookbehinds. .P .\" HTML .\" "Subroutine" .\" calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long -as the subpattern matches a fixed-length string. +as the subpattern matches a fixed-length string. However, .\" HTML .\" -Recursion, +recursion, .\" -however, is not supported. +that is, a "subroutine" call into a group that is already active, +is not supported. +.P +Perl does not support back references in lookbehinds. PCRE2 does support them, +but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option +must not be set, there must be no use of (?| in the pattern (it creates +duplicate subpattern numbers), and if the back reference is by name, the name +must be unique. Of course, the referenced subpattern must itself be of fixed +length. The following pattern matches words containing at least two characters +that begin and end with the same character: +.sp + \eb(\ew)\ew++(?<=\e1) .P Possessive quantifiers can be used in conjunction with lookbehind assertions to specify efficient matching of fixed-length strings at the end of subject @@ -2436,7 +2497,9 @@ This makes the fragment independent of the parentheses in the larger pattern. .sp Perl uses the syntax (?()...) or (?('name')...) to test for a used subpattern by name. For compatibility with earlier versions of PCRE1, which had -this facility before Perl, the syntax (?(name)...) is also recognized. +this facility before Perl, the syntax (?(name)...) is also recognized. Note, +however, that undelimited names consisting of the letter R followed by digits +are ambiguous (see the following section). .P Rewriting the above example to use a named subpattern gives this: .sp @@ -2450,33 +2513,55 @@ matched. .SS "Checking for pattern recursion" .rs .sp -If the condition is the string (R), and there is no subpattern with the name R, -the condition is true if a recursive call to the whole pattern or any -subpattern has been made. If digits or a name preceded by ampersand follow the -letter R, for example: -.sp - (?(R3)...) or (?(R&name)...) -.sp -the condition is true if the most recent recursion is into a subpattern whose -number or name is given. This condition does not check the entire recursion -stack. If the name used in a condition of this kind is a duplicate, the test is -applied to all subpatterns of the same name, and is true if any one of them is -the most recent recursion. -.P -At "top level", all these recursion test conditions are false. +"Recursion" in this sense refers to any subroutine-like call from one part of +the pattern to another, whether or not it is actually recursive. See the +sections entitled .\" HTML .\" -The syntax for recursive patterns +"Recursive patterns" .\" -is described below. +and +.\" HTML +.\" +"Subpatterns as subroutines" +.\" +below for details of recursion and subpattern calls. +.P +If a condition is the string (R), and there is no subpattern with the name R, +the condition is true if matching is currently in a recursion or subroutine +call to the whole pattern or any subpattern. If digits follow the letter R, and +there is no subpattern with that name, the condition is true if the most recent +call is into a subpattern with the given number, which must exist somewhere in +the overall pattern. This is a contrived example that is equivalent to a+b: +.sp + ((?(R1)a+|(?1)b)) +.sp +However, in both cases, if there is a subpattern with a matching name, the +condition tests for its being set, as described in the section above, instead +of testing for recursion. For example, creating a group with the name R1 by +adding (?) to the above pattern completely changes its meaning. +.P +If a name preceded by ampersand follows the letter R, for example: +.sp + (?(R&name)...) +.sp +the condition is true if the most recent recursion is into a subpattern of that +name (which must exist within the pattern). +.P +This condition does not check the entire recursion stack. It tests only the +current level. If the name used in a condition of this kind is a duplicate, the +test is applied to all subpatterns of the same name, and is true if any one of +them is the most recent recursion. +.P +At "top level", all these recursion test conditions are false. . . .\" HTML .SS "Defining subpatterns for use by reference only" .rs .sp -If the condition is the string (DEFINE), and there is no subpattern with the -name DEFINE, the condition is always false. In this case, there may be only one +If the condition is the string (DEFINE), the condition is always false, even if +there is a group with the name DEFINE. In this case, there may be only one alternative in the subpattern. It is always skipped if control reaches this point in the pattern; the idea of DEFINE is that it can be used to define subroutines that can be referenced from elsewhere. (The use of @@ -2513,7 +2598,8 @@ For example: (?(VERSION>=10.4)yes|no) .sp This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or -"no" otherwise. +"no" otherwise. The fractional part of the version number may not contain more +than two digits. . . .SS "Assertion conditions" @@ -2630,6 +2716,23 @@ pattern above you can write (?-2) to refer to the second most recently opened parentheses preceding the recursion. In other words, a negative number counts capturing parentheses leftwards from the point at which it is encountered. .P +Be aware however, that if +.\" HTML +.\" +duplicate subpattern numbers +.\" +are in use, relative references refer to the earliest subpattern with the +appropriate number. Consider, for example: +.sp + (?|(a)|(b)) (c) (?-2) +.sp +The first two capturing groups (a) and (b) are both numbered 1, and group (c) +is number 2. When the reference (?-2) is encountered, the second most recently +opened parentheses has the number 1, but it is the first such group (the (a) +group) to which the recursion refers. This would be the same if an absolute +reference (?1) was used. In other words, relative references are just a +shorthand for computing a group number. +.P It is also possible to refer to subsequently opened parentheses, by writing references such as (?+2). However, these cannot be recursive because the reference is not inside the parentheses that are referenced. They are always @@ -2929,14 +3032,32 @@ in production code should be noted to avoid problems during upgrades." The same remarks apply to the PCRE2 features described in this section. .P The new verbs make use of what was previously invalid syntax: an opening -parenthesis followed by an asterisk. They are generally of the form -(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving -differently depending on whether or not a name is present. A name is any -sequence of characters that does not include a closing parenthesis. The maximum -length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit -libraries. If the name is empty, that is, if the closing parenthesis -immediately follows the colon, the effect is as if the colon were not there. -Any number of these verbs may occur in a pattern. +parenthesis followed by an asterisk. They are generally of the form (*VERB) or +(*VERB:NAME). Some verbs take either form, possibly behaving differently +depending on whether or not a name is present. +.P +By default, for compatibility with Perl, a name is any sequence of characters +that does not include a closing parenthesis. The name is not processed in +any way, and it is not possible to include a closing parenthesis in the name. +This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result +is no longer Perl-compatible. +.P +When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names +and only an unescaped closing parenthesis terminates the name. However, the +only backslash items that are permitted are \eQ, \eE, and sequences such as +\ex{100} that define character code points. Character type escapes such as \ed +are faulted. +.P +A closing parenthesis can be included in a name either as \e) or between \eQ +and \eE. In addition to backslash processing, if the PCRE2_EXTENDED option is +also set, unescaped whitespace in verb names is skipped, and #-comments are +recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not +affect verb names unless PCRE2_ALT_VERBNAMES is also set. +.P +The maximum length of a name is 255 in the 8-bit library and 65535 in the +16-bit and 32-bit libraries. If the name is empty, that is, if the closing +parenthesis immediately follows the colon, the effect is as if the colon were +not there. Any number of these verbs may occur in a pattern. .P Since these verbs are specifically related to backtracking, most of them can be used only when the pattern is to be matched using the traditional matching @@ -3361,6 +3482,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 13 June 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 27 December 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2posix.3 b/pcre2/doc/pcre2posix.3 index b6669752f..70a86d81d 100644 --- a/pcre2/doc/pcre2posix.3 +++ b/pcre2/doc/pcre2posix.3 @@ -1,4 +1,4 @@ -.TH PCRE2POSIX 3 "20 October 2014" "PCRE2 10.00" +.TH PCRE2POSIX 3 "31 January 2016" "PCRE2 10.22" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "SYNOPSIS" @@ -28,7 +28,7 @@ expression 8-bit library. See the \fBpcre2api\fP .\" documentation for a description of PCRE2's native API, which contains much -additional functionality. There is no POSIX-style wrapper for PCRE2's 16-bit +additional functionality. There are no POSIX-style wrappers for PCRE2's 16-bit and 32-bit libraries. .P The functions described here are just wrapper functions that ultimately call @@ -44,9 +44,9 @@ value zero. This has no effect, but since programs that are written to the POSIX interface often use it, this makes it easier to slot in PCRE2 as a replacement library. Other POSIX options are not even defined. .P -There are also some other options that are not defined by POSIX. These have -been added at the request of users who want to make use of certain -PCRE2-specific features via the POSIX calling interface. +There are also some options that are not defined by POSIX. These have been +added at the request of users who want to make use of certain PCRE2-specific +features via the POSIX calling interface. .P When PCRE2 is called via these functions, it is only the API that is POSIX-like in style. The syntax and semantics of the regular expressions themselves are @@ -95,11 +95,11 @@ defined POSIX behaviour for REG_NEWLINE (see the following section). .sp REG_NOSUB .sp -The PCRE2_NO_AUTO_CAPTURE option is set when the regular expression is passed -for compilation to the native function. In addition, when a pattern that is -compiled with this flag is passed to \fBregexec()\fP for matching, the -\fInmatch\fP and \fIpmatch\fP arguments are ignored, and no captured strings -are returned. +When a pattern that is compiled with this flag is passed to \fBregexec()\fP for +matching, the \fInmatch\fP and \fIpmatch\fP arguments are ignored, and no +captured strings are returned. Versions of the PCRE library prior to 10.22 used +to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens +because it disables the use of back references. .sp REG_UCP .sp @@ -145,7 +145,7 @@ use the contents of the \fIpreg\fP structure. If, for example, you pass it to This area is not simple, because POSIX and Perl take different views of things. It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was never intended to be a POSIX engine. The following table lists the different -possibilities for matching newline characters in PCRE2: +possibilities for matching newline characters in Perl and PCRE2: .sp Default Change with .sp @@ -155,7 +155,7 @@ possibilities for matching newline characters in PCRE2: $ matches \en in middle no PCRE2_MULTILINE ^ matches \en in middle no PCRE2_MULTILINE .sp -This is the equivalent table for POSIX: +This is the equivalent table for a POSIX-compatible pattern matcher: .sp Default Change with .sp @@ -165,13 +165,17 @@ This is the equivalent table for POSIX: $ matches \en in middle no REG_NEWLINE ^ matches \en in middle no REG_NEWLINE .sp -PCRE2's behaviour is the same as Perl's, except that there is no equivalent for -PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there is no way to stop -newline from matching [^a]. +This behaviour is not what happens when PCRE2 is called via its POSIX +API. By default, PCRE2's behaviour is the same as Perl's, except that there is +no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there +is no way to stop newline from matching [^a]. .P -The default POSIX newline handling can be obtained by setting PCRE2_DOTALL and -PCRE2_DOLLAR_ENDONLY, but there is no way to make PCRE2 behave exactly as for -the REG_NEWLINE action. +Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and +PCRE2_DOLLAR_ENDONLY when calling \fBpcre2_compile()\fP directly, but there is +no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using +the POSIX API, passing REG_NEWLINE to PCRE2's \fBregcomp()\fP function +causes PCRE2_MULTILINE to be passed to \fBpcre2_compile()\fP, and REG_DOTALL +passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY. . . .SH "MATCHING A PATTERN" @@ -207,16 +211,18 @@ to have a terminating NUL located at \fIstring\fP + \fIpmatch[0].rm_eo\fP IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software intended to be portable to other systems. Note that a non-zero \fIrm_so\fP does not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not -how it is matched. +how it is matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL are +mutually exclusive; the error REG_INVARG is returned. .P If the pattern was compiled with the REG_NOSUB flag, no data about any matched strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of -\fBregexec()\fP are ignored. +\fBregexec()\fP are ignored (except possibly as input for REG_STARTEND). .P -If the value of \fInmatch\fP is zero, or if the value \fIpmatch\fP is NULL, -no data about any matched strings is returned. +The value of \fInmatch\fP may be zero, and the value \fIpmatch\fP may be NULL +(unless REG_STARTEND is set); in both these cases no data about any matched +strings is returned. .P -Otherwise,the portion of the string that was matched, and also any captured +Otherwise, the portion of the string that was matched, and also any captured substrings, are returned via the \fIpmatch\fP argument, which points to an array of \fInmatch\fP structures of type \fIregmatch_t\fP, containing the members \fIrm_so\fP and \fIrm_eo\fP. These contain the byte offset to the first @@ -236,9 +242,11 @@ header file, of which REG_NOMATCH is the "expected" failure code. The \fBregerror()\fP function maps a non-zero errorcode from either \fBregcomp()\fP or \fBregexec()\fP to a printable message. If \fIpreg\fP is not NULL, the error should have arisen from the use of that structure. A message -terminated by a binary zero is placed in \fIerrbuf\fP. The length of the -message, including the zero, is limited to \fIerrbuf_size\fP. The yield of the -function is the size of buffer needed to hold the whole message. +terminated by a binary zero is placed in \fIerrbuf\fP. If the buffer is too +short, only the first \fIerrbuf_size\fP - 1 characters of the error message are +used. The yield of the function is the size of buffer needed to hold the whole +message, including the terminating zero. This value is greater than +\fIerrbuf_size\fP if the message was truncated. . . .SH MEMORY USAGE @@ -263,6 +271,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 20 October 2014 -Copyright (c) 1997-2014 University of Cambridge. +Last updated: 31 January 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2sample.3 b/pcre2/doc/pcre2sample.3 index 7b469356b..661e39274 100644 --- a/pcre2/doc/pcre2sample.3 +++ b/pcre2/doc/pcre2sample.3 @@ -1,4 +1,4 @@ -.TH PCRE2SAMPLE 3 "20 October 2014" "PCRE2 10.00" +.TH PCRE2SAMPLE 3 "02 February 2016" "PCRE2 10.22" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 SAMPLE PROGRAM" @@ -13,23 +13,28 @@ distribution. A listing of this program is given in the documentation. If you do not have a copy of the PCRE2 distribution, you can save this listing to re-create the contents of \fIpcre2demo.c\fP. .P -The demonstration program, which uses the PCRE2 8-bit library, compiles the -regular expression that is its first argument, and matches it against the -subject string in its second argument. No PCRE2 options are set, and default -character tables are used. If matching succeeds, the program outputs the -portion of the subject that matched, together with the contents of any captured -substrings. +The demonstration program compiles the regular expression that is its +first argument, and matches it against the subject string in its second +argument. No PCRE2 options are set, and default character tables are used. If +matching succeeds, the program outputs the portion of the subject that matched, +together with the contents of any captured substrings. .P If the -g option is given on the command line, the program then goes on to check for further matches of the same regular expression in the same subject string. The logic is a little bit tricky because of the possibility of matching an empty string. Comments in the code explain what is going on. .P +The code in \fBpcre2demo.c\fP is an 8-bit program that uses the PCRE2 8-bit +library. It handles strings and characters that are stored in 8-bit code units. +By default, one character corresponds to one code unit, but if the pattern +starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, +where characters may occupy multiple code units. +.P If PCRE2 is installed in the standard include and library directories for your operating system, you should be able to compile the demonstration program using -this command: +a command like this: .sp - gcc -o pcre2demo pcre2demo.c -lpcre2-8 + cc -o pcre2demo pcre2demo.c -lpcre2-8 .sp If PCRE2 is installed elsewhere, you may need to add additional options to the command line. For example, on a Unix-like system that has PCRE2 installed in @@ -37,12 +42,11 @@ command line. For example, on a Unix-like system that has PCRE2 installed in like this: .sp .\" JOINSH - gcc -o pcre2demo -I/usr/local/include pcre2demo.c \e - -L/usr/local/lib -lpcre2-8 + cc -o pcre2demo -I/usr/local/include pcre2demo.c \e + -L/usr/local/lib -lpcre2-8 .sp -.P -Once you have compiled and linked the demonstration program, you can run simple -tests like this: +Once you have built the demonstration program, you can run simple tests like +this: .sp ./pcre2demo 'cat|dog' 'the cat sat on the mat' ./pcre2demo -g 'cat|dog' 'the dog sat on the cat' @@ -51,12 +55,13 @@ Note that there is a much more comprehensive test program, called .\" HREF \fBpcre2test\fP, .\" -which supports many more facilities for testing regular expressions using the -PCRE2 libraries. The +which supports many more facilities for testing regular expressions using all +three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be +installed). The .\" HREF \fBpcre2demo\fP .\" -program is provided as a simple coding example. +program is provided as a relatively simple coding example. .P If you try to run .\" HREF @@ -65,7 +70,7 @@ If you try to run when PCRE2 is not installed in the standard library directory, you may get an error like this on some operating systems (e.g. Solaris): .sp - ld.so.1: a.out: fatal: libpcre2.so.0: open failed: No such file or directory + ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory .sp This is caused by the way shared library support works on those systems. You need to add @@ -89,6 +94,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 20 October 2014 -Copyright (c) 1997-2014 University of Cambridge. +Last updated: 02 February 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2serialize.3 b/pcre2/doc/pcre2serialize.3 index a76272b70..664c1db56 100644 --- a/pcre2/doc/pcre2serialize.3 +++ b/pcre2/doc/pcre2serialize.3 @@ -1,4 +1,4 @@ -.TH PCRE2SERIALIZE 3 "20 January 2015" "PCRE2 10.10" +.TH PCRE2SERIALIZE 3 "24 May 2016" "PCRE2 10.22" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS" @@ -22,12 +22,22 @@ If you are running an application that uses a large number of regular expression patterns, it may be useful to store them in a precompiled form instead of having to compile them every time the application is run. However, if you are using the just-in-time optimization feature, it is not possible to -save and reload the JIT data, because it is position-dependent. In addition, -the host on which the patterns are reloaded must be running the same version of -PCRE2, with the same code unit width, and must also have the same endianness, -pointer width and PCRE2_SIZE type. For example, patterns compiled on a 32-bit -system using PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor -can they be reloaded using the 8-bit library. +save and reload the JIT data, because it is position-dependent. The host on +which the patterns are reloaded must be running the same version of PCRE2, with +the same code unit width, and must also have the same endianness, pointer width +and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using +PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be +reloaded using the 8-bit library. +. +. +.SH "SECURITY CONCERNS" +.rs +.sp +The facility for saving and restoring compiled patterns is intended for use +within individual applications. As such, the data supplied to +\fBpcre2_serialize_decode()\fP is expected to be trusted data, not data from +arbitrary external sources. There is only some simple consistency checking, not +complete validation of what is being re-loaded. . . .SH "SAVING COMPILED PATTERNS" @@ -129,20 +139,26 @@ is filled with those that fit, and the remainder are ignored. The yield of the function is the number of decoded patterns, or one of the following negative error codes: .sp - PCRE2_ERROR_BADDATA second argument is zero or less - PCRE2_ERROR_BADMAGIC mismatch of id bytes in the data - PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE2 version - PCRE2_ERROR_MEMORY memory allocation failed - PCRE2_ERROR_NULL first or third argument is NULL + PCRE2_ERROR_BADDATA second argument is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in the data + PCRE2_ERROR_BADMODE mismatch of code unit size or PCRE2 version + PCRE2_ERROR_BADSERIALIZEDDATA other sanity check failure + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_NULL first or third argument is NULL .sp PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled on a system with different endianness. .P Decoded patterns can be used for matching in the usual way, and must be freed -by calling \fBpcre2_code_free()\fP as normal. A single copy of the character -tables is used by all the decoded patterns. A reference count is used to +by calling \fBpcre2_code_free()\fP. However, be aware that there is a potential +race issue if you are using multiple patterns that were decoded from a single +byte stream in a multithreaded application. A single copy of the character +tables is used by all the decoded patterns and a reference count is used to arrange for its memory to be automatically freed when the last pattern is -freed. +freed, but there is no locking on this reference count. Therefore, if you want +to call \fBpcre2_code_free()\fP for these patterns in different threads, you +must arrange your own locking, and ensure that \fBpcre2_code_free()\fP cannot +be called by two threads at the same time. .P If a pattern was processed by \fBpcre2_jit_compile()\fP before being serialized, the JIT data is discarded and so is no longer available after a @@ -165,6 +181,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 20 January 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 24 May 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2stack.3 b/pcre2/doc/pcre2stack.3 index 871126353..89d101bbc 100644 --- a/pcre2/doc/pcre2stack.3 +++ b/pcre2/doc/pcre2stack.3 @@ -1,4 +1,4 @@ -.TH PCRE2STACK 3 "21 November 2014" "PCRE2 10.00" +.TH PCRE2STACK 3 "23 December 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 DISCUSSION OF STACK USAGE" @@ -43,11 +43,12 @@ assertion and "once-only" subpatterns, which are handled like subroutine calls. Normally, these are never very deep, and the limit on the complexity of \fBpcre2_dfa_match()\fP is controlled by the amount of workspace it is given. However, it is possible to write patterns with runaway infinite recursions; -such patterns will cause \fBpcre2_dfa_match()\fP to run out of stack. At -present, there is no protection against this. +such patterns will cause \fBpcre2_dfa_match()\fP to run out of stack unless a +limit is applied (see below). .P -The comments that follow do NOT apply to \fBpcre2_dfa_match()\fP; they are -relevant only for \fBpcre2_match()\fP without the JIT optimization. +The comments in the next three sections do not apply to +\fBpcre2_dfa_match()\fP; they are relevant only for \fBpcre2_match()\fP without +the JIT optimization. . . .SS "Reducing \fBpcre2_match()\fP's stack usage" @@ -106,7 +107,7 @@ in the \fBpcre2api\fP .\" documentation. Since the block sizes are always the same, it may be possible to -implement customized a memory handler that is more efficient than the standard +implement a customized memory handler that is more efficient than the standard function. The memory blocks obtained for this purpose are retained and re-used if possible while \fBpcre2_match()\fP is running. They are all freed just before it exits. @@ -147,6 +148,15 @@ pattern to match. This is done by calling \fBpcre2_match()\fP repeatedly with different limits. . . +.SS "Limiting \fBpcre2_dfa_match()\fP's stack usage" +.rs +.sp +The recursion limit, as described above for \fBpcre2_match()\fP, also applies +to \fBpcre2_dfa_match()\fP, whose use of recursive function calls for +recursions in the pattern can lead to runaway stack usage. The non-recursive +match limit is not relevant for DFA matching, and is ignored. +. +. .SS "Changing stack size in Unix-like systems" .rs .sp @@ -197,6 +207,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 21 November 2014 -Copyright (c) 1997-2014 University of Cambridge. +Last updated: 23 December 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2syntax.3 b/pcre2/doc/pcre2syntax.3 index dc34538cc..29a956255 100644 --- a/pcre2/doc/pcre2syntax.3 +++ b/pcre2/doc/pcre2syntax.3 @@ -1,4 +1,4 @@ -.TH PCRE2SYNTAX 3 "13 June 2015" "PCRE2 10.20" +.TH PCRE2SYNTAX 3 "23 December 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY" @@ -81,9 +81,10 @@ it matches a literal "u". \eW a "non-word" character \eX a Unicode extended grapheme cluster .sp -The application can lock out the use of \eC by setting the -PCRE2_NEVER_BACKSLASH_C option. It is dangerous because it may leave the -current matching point in the middle of a UTF-8 or UTF-16 character. +\eC is dangerous because it may leave the current matching point in the middle +of a UTF-8 or UTF-16 character. The application can lock out the use of \eC by +setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 +with the use of \eC permanently disabled. .P By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode or in the 16-bit and 32-bit libraries. However, if locale-specific matching is @@ -159,6 +160,8 @@ at release 5.18. .SH "SCRIPT NAMES FOR \ep AND \eP" .rs .sp +Ahom, +Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, @@ -199,6 +202,7 @@ Gurmukhi, Han, Hangul, Hanunoo, +Hatran, Hebrew, Hiragana, Imperial_Aramaic, @@ -235,12 +239,14 @@ Miao, Modi, Mongolian, Mro, +Multani, Myanmar, Nabataean, New_Tai_Lue, Nko, Ogham, Ol_Chiki, +Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, @@ -262,6 +268,7 @@ Saurashtra, Sharada, Shavian, Siddham, +SignWriting, Sinhala, Sora_Sompeng, Sundanese, @@ -421,9 +428,10 @@ appear. (*UCP) set PCRE2_UCP (use Unicode properties for \ed etc) .sp Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the -limits set by the caller of pcre2_match(), not increase them. The application -can lock out the use of (*UTF) and (*UCP) by setting the PCRE2_NEVER_UTF or -PCRE2_NEVER_UCP options, respectively, at compile time. +limits set by the caller of \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP, not +increase them. The application can lock out the use of (*UTF) and (*UCP) by +setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at +compile time. . . .SH "NEWLINE CONVENTION" @@ -466,6 +474,9 @@ Each top-level branch of a look behind must be of a fixed length. \en reference by number (can be ambiguous) \egn reference by number \eg{n} reference by number + \eg+n relative reference by number (PCRE2 extension) + \eg-n relative reference by number + \eg{+n} relative reference by number (PCRE2 extension) \eg{-n} relative reference by number \ek reference by name (Perl) \ek'name' reference by name (Perl) @@ -504,13 +515,17 @@ Each top-level branch of a look behind must be of a fixed length. (?(-n) relative reference condition (?() named reference condition (Perl) (?('name') named reference condition (Perl) - (?(name) named reference condition (PCRE2) + (?(name) named reference condition (PCRE2, deprecated) (?(R) overall recursion condition - (?(Rn) specific group recursion condition - (?(R&name) specific recursion condition + (?(Rn) specific numbered group recursion condition + (?(R&name) specific named group recursion condition (?(DEFINE) define subpattern for reference (?(VERSION[>]=n.m) test PCRE2 version (?(assert) assertion condition +.sp +Note the ambiguity of (?(R) and (?(Rn) which might be named reference +conditions or recursion tests. Such a condition is interpreted as a reference +condition if the relevant named group exists. . . .SH "BACKTRACKING CONTROL" @@ -570,6 +585,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 13 June 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 23 December 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2test.1 b/pcre2/doc/pcre2test.1 index 857adc354..bd7383e69 100644 --- a/pcre2/doc/pcre2test.1 +++ b/pcre2/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "20 May 2015" "PCRE 10.20" +.TH PCRE2TEST 1 "28 December 2016" "PCRE 10.23" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -29,7 +29,7 @@ subject is processed, and what output is produced. .P As the original fairly simple PCRE library evolved, it acquired many different features, and as a result, the original \fBpcretest\fP program ended up with a -lot of options in a messy, arcane syntax, for testing all the features. The +lot of options in a messy, arcane syntax for testing all the features. The move to the new PCRE2 API provided an opportunity to re-implement the test program as \fBpcre2test\fP, with a cleaner modifier syntax. Nevertheless, there are still many obscure modifiers, some of which are specifically designed for @@ -47,31 +47,63 @@ strings that are encoded in 8-bit, 16-bit, or 32-bit code units. One, two, or all three of these libraries may be simultaneously installed. The \fBpcre2test\fP program can be used to test all the libraries. However, its own input and output are always in 8-bit format. When testing the 16-bit or 32-bit -libraries, patterns and subject strings are converted to 16- or 32-bit format -before being passed to the library functions. Results are converted back to -8-bit code units for output. +libraries, patterns and subject strings are converted to 16-bit or 32-bit +format before being passed to the library functions. Results are converted back +to 8-bit code units for output. .P In the rest of this document, the names of library functions and structures are given in generic form, for example, \fBpcre_compile()\fP. The actual names used in the libraries have a suffix _8, _16, or _32, as appropriate. . . +.\" HTML .SH "INPUT ENCODING" .rs .sp Input to \fBpcre2test\fP is processed line by line, either by calling the C -library's \fBfgets()\fP function, or via the \fBlibreadline\fP library (see -below). The input is processed using using C's string functions, so must not -contain binary zeroes, even though in Unix-like environments, \fBfgets()\fP -treats any bytes other than newline as data characters. In some Windows -environments character 26 (hex 1A) causes an immediate end of file, and no -further data is read. +library's \fBfgets()\fP function, or via the \fBlibreadline\fP library. In some +Windows environments character 26 (hex 1A) causes an immediate end of file, and +no further data is read, so this character should be avoided unless you really +want that action. .P -For maximum portability, therefore, it is safest to avoid non-printing -characters in \fBpcre2test\fP input files. There is a facility for specifying a -pattern's characters as hexadecimal pairs, thus making it possible to include -binary zeroes in a pattern for testing purposes. Subject lines are processed -for backslash escapes, which makes it possible to include any data value. +The input is processed using using C's string functions, so must not +contain binary zeroes, even though in Unix-like environments, \fBfgets()\fP +treats any bytes other than newline as data characters. An error is generated +if a binary zero is encountered. Subject lines are processed for backslash +escapes, which makes it possible to include any data value in strings that are +passed to the library for matching. For patterns, there is a facility for +specifying some or all of the 8-bit input characters as hexadecimal pairs, +which makes it possible to include binary zeros. +. +. +.SS "Input for the 16-bit and 32-bit libraries" +.rs +.sp +When testing the 16-bit or 32-bit libraries, there is a need to be able to +generate character code points greater than 255 in the strings that are passed +to the library. For subject lines, backslash escapes can be used. In addition, +when the \fButf\fP modifier (see +.\" HTML +.\" +"Setting compilation options" +.\" +below) is set, the pattern and any following subject lines are interpreted as +UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate. +.P +For non-UTF testing of wide characters, the \fButf8_input\fP modifier can be +used. This is mutually exclusive with \fButf\fP, and is allowed only in 16-bit +or 32-bit mode. It causes the pattern and following subject lines to be treated +as UTF-8 according to the original definition (RFC 2279), which allows for +character values up to 0x7fffffff. Each character is placed in one 16-bit or +32-bit code unit (in the 16-bit case, values greater than 0xffff cause an error +to occur). +.P +UTF-8 is not capable of encoding values greater than 0x7fffffff, but such +values can be handled by the 32-bit library. When testing this library in +non-UTF mode with \fButf8_input\fP set, if any character is preceded by the +byte 0xff (which is an illegal byte in UTF-8) 0x80000000 is added to the +character's value. This is the only way of passing such code points in a +pattern string. For subject strings, using an escape sequence is preferable. . . .SH "COMMAND LINE OPTIONS" @@ -92,8 +124,12 @@ If the 32-bit library has been built, this option causes it to be used. If only the 32-bit library has been built, this is the default. If the 32-bit library has not been built, this option causes an error. .TP 10 +\fB-ac\fP +Behave as if each pattern has the \fBauto_callout\fP modifier, that is, insert +automatic callouts into every pattern that is compiled. +.TP 10 \fB-b\fP -Behave as if each pattern has the \fB/fullbincode\fP modifier; the full +Behave as if each pattern has the \fBfullbincode\fP modifier; the full internal binary form of the pattern is output after compilation. .TP 10 \fB-C\fP @@ -122,12 +158,13 @@ following options output the value and set the exit code as indicated: The following options output 1 for true or 0 for false, and set the exit code to the same value: .sp - ebcdic compiled for an EBCDIC environment - jit just-in-time support is available - pcre2-16 the 16-bit library was built - pcre2-32 the 32-bit library was built - pcre2-8 the 8-bit library was built - unicode Unicode support is available + backslash-C \eC is supported (not locked out) + ebcdic compiled for an EBCDIC environment + jit just-in-time support is available + pcre2-16 the 16-bit library was built + pcre2-32 the 32-bit library was built + pcre2-8 the 8-bit library was built + unicode Unicode support is available .sp If an unknown option is given, an error message is output; the exit code is 0. .TP 10 @@ -141,11 +178,17 @@ Behave as if each subject line has the \fBdfa\fP modifier; matching is done using the \fBpcre2_dfa_match()\fP function instead of the default \fBpcre2_match()\fP. .TP 10 +\fB-error\fP \fInumber[,number,...]\fP +Call \fBpcre2_get_error_message()\fP for each of the error numbers in the +comma-separated list, display the resulting messages on the standard output, +then exit with zero exit code. The numbers may be positive or negative. This is +a convenience facility for PCRE2 maintainers. +.TP 10 \fB-help\fP Output a brief summary these options and then exit. .TP 10 \fB-i\fP -Behave as if each pattern has the \fB/info\fP modifier; information about the +Behave as if each pattern has the \fBinfo\fP modifier; information about the compiled pattern is given after compilation. .TP 10 \fB-jit\fP @@ -217,9 +260,9 @@ Each subject line is matched separately and independently. If you want to do multi-line matches, you have to use the \en escape sequence (or \er or \er\en, etc., depending on the newline setting) in a single line of input to encode the newline sequences. There is no limit on the length of subject lines; the input -buffer is automatically extended if it is too small. There is a replication -feature that makes it possible to generate long subject lines without having to -supply them explicitly. +buffer is automatically extended if it is too small. There are replication +features that makes it possible to generate long repetitive pattern or subject +lines without having to supply them explicitly. .P An empty line or the end of the file signals the end of the subject lines for a test, at which point a new pattern or command line is expected if there is @@ -259,6 +302,34 @@ described in the section entitled "Saving and restoring compiled patterns" .\" below. .\" +.sp + #newline_default [] +.sp +When PCRE2 is built, a default newline convention can be specified. This +determines which characters and/or character pairs are recognized as indicating +a newline in a pattern or subject string. The default can be overridden when a +pattern is compiled. The standard test files contain tests of various newline +conventions, but the majority of the tests expect a single linefeed to be +recognized as a newline by default. Without special action the tests would fail +when PCRE2 is compiled with either CR or CRLF as the default newline. +.P +The #newline_default command specifies a list of newline types that are +acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, or +ANY (in upper or lower case), for example: +.sp + #newline_default LF Any anyCRLF +.sp +If the default newline is in the list, this command has no effect. Otherwise, +except when testing the POSIX API, a \fBnewline\fP modifier that specifies the +first newline convention in the list (LF in the above example) is added to any +pattern that does not already have a \fBnewline\fP modifier. If the newline +list is empty, the feature is turned off. This command is present in a number +of the standard test input files. +.P +When the POSIX API is being tested there is no way to override the default +newline convention, though it is possible to set the newline convention from +within the pattern. A warning is given if the \fBposix\fP modifier is used when +\fB#newline_default\fP would set a default for the non-POSIX API. .sp #pattern .sp @@ -276,9 +347,10 @@ test files that are also processed by \fBperltest.sh\fP. The \fB#perltest\fP command helps detect tests that are accidentally put in the wrong file. .sp #pop [] + #popcopy [] .sp -This command is used to manipulate the stack of compiled patterns, as described -in the section entitled "Saving and restoring compiled patterns" +These commands are used to manipulate the stack of compiled patterns, as +described in the section entitled "Saving and restoring compiled patterns" .\" HTML .\" below. @@ -303,12 +375,13 @@ subject lines. Modifiers on a subject line can change these settings. .rs .sp Modifier lists are used with both pattern and subject lines. Items in a list -are separated by commas and optional white space. Some modifiers may be given -for both patterns and subject lines, whereas others are valid for one or the -other only. Each modifier has a long name, for example "anchored", and some of -them must be followed by an equals sign and a value, for example, "offset=12". -Modifiers that do not take values may be preceded by a minus sign to turn off a -previous setting. +are separated by commas followed by optional white space. Trailing whitespace +in a modifier list is ignored. Some modifiers may be given for both patterns +and subject lines, whereas others are valid only for one or the other. Each +modifier has a long name, for example "anchored", and some of them must be +followed by an equals sign and a value, for example, "offset=12". Values cannot +contain comma characters, but may contain spaces. Modifiers that do not take +values may be preceded by a minus sign to turn off a previous setting. .P A few of the more common modifiers can also be specified as single letters, for example "i" for "caseless". In documentation, following the Perl convention, @@ -414,6 +487,12 @@ the start of a modifier list. For example: .sp abc\e=notbol,notempty .sp +If the subject string is empty and \e= is followed by whitespace, the line is +treated as a comment line, and is not used for matching. For example: +.sp + \e= This is a comment. + abc\e= This is an invalid modifier list. +.sp A backslash followed by any other non-alphanumeric character just escapes that character. A backslash followed by anything else causes an error. However, if the very last character in the line is a backslash (and there is no modifier @@ -424,10 +503,10 @@ a real empty line terminates the data input. .SH "PATTERN MODIFIERS" .rs .sp -There are three types of modifier that can appear in pattern lines, two of -which may also be used in a \fB#pattern\fP command. A pattern's modifier list -can add to or override default modifiers that were set by a previous -\fB#pattern\fP command. +There are several types of modifier that can appear in pattern lines. Except +where noted below, they may also be used in \fB#pattern\fP commands. A +pattern's modifier list can add to or override default modifiers that were set +by a previous \fB#pattern\fP command. . . .\" HTML @@ -437,13 +516,14 @@ can add to or override default modifiers that were set by a previous The following modifiers set options for \fBpcre2_compile()\fP. The most common ones have single-letter abbreviations. See .\" HREF -\fBpcreapi\fP +\fBpcre2api\fP .\" for a description of their effects. .sp allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS alt_bsux set PCRE2_ALT_BSUX alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_verbnames set PCRE2_ALT_VERBNAMES anchored set PCRE2_ANCHORED auto_callout set PCRE2_AUTO_CALLOUT /i caseless set PCRE2_CASELESS @@ -464,12 +544,15 @@ for a description of their effects. no_utf_check set PCRE2_NO_UTF_CHECK ucp set PCRE2_UCP ungreedy set PCRE2_UNGREEDY + use_offset_limit set PCRE2_USE_OFFSET_LIMIT utf set PCRE2_UTF .sp As well as turning on the PCRE2_UTF option, the \fButf\fP modifier causes all non-printing characters in output strings to be printed using the \ex{hh...} notation. Otherwise, those less than 0x100 are output in hex without the curly -brackets. +brackets. Setting \fButf\fP in 16-bit or 32-bit mode also causes pattern and +subject strings to be translated to UTF-16 or UTF-32, respectively, before +being passed to library functions. . . .\" HTML @@ -485,18 +568,24 @@ about the pattern: debug same as info,fullbincode fullbincode show binary code with lengths /I info show info about compiled pattern - hex pattern is coded in hexadecimal + hex unquoted characters are hexadecimal jit[=] use JIT jitfast use JIT fast path jitverify verify JIT use locale= use this locale + max_pattern_length= set the maximum pattern length memory show memory used newline= set newline type + null_context compile with a NULL context parens_nest_limit= set maximum parentheses depth posix use the POSIX API + posix_nosub use the POSIX API with REG_NOSUB push push compiled pattern onto the stack + pushcopy push a copy onto the stack stackguard= test the stackguard feature tables=[0|1|2] select internal tables + use_length do not zero-terminate the pattern + utf8_input treat input as UTF-8 .sp The effects of these modifiers are described in the following sections. . @@ -565,40 +654,148 @@ is requested. For each callout, either its number or string is given, followed by the item that follows it in the pattern. . . -.SS "Specifying a pattern in hex" +.SS "Passing a NULL context" .rs .sp -The \fBhex\fP modifier specifies that the characters of the pattern are to be -interpreted as pairs of hexadecimal digits. White space is permitted between -pairs. For example: +Normally, \fBpcre2test\fP passes a context block to \fBpcre2_compile()\fP. If +the \fBnull_context\fP modifier is set, however, NULL is passed. This is for +testing that \fBpcre2_compile()\fP behaves correctly in this case (it uses +default values). +. +. +.SS "Specifying the pattern's length" +.rs +.sp +By default, patterns are passed to the compiling functions as zero-terminated +strings. When using the POSIX wrapper API, there is no other option. However, +when using PCRE2's native API, patterns can be passed by length instead of +being zero-terminated. The \fBuse_length\fP modifier causes this to happen. +Using a length happens automatically (whether or not \fBuse_length\fP is set) +when \fBhex\fP is set, because patterns specified in hexadecimal may contain +binary zeros. +. +. +.SS "Specifying pattern characters in hexadecimal" +.rs +.sp +The \fBhex\fP modifier specifies that the characters of the pattern, except for +substrings enclosed in single or double quotes, are to be interpreted as pairs +of hexadecimal digits. This feature is provided as a way of creating patterns +that contain binary zeros and other non-printing characters. White space is +permitted between pairs of digits. For example, this pattern contains three +characters: .sp /ab 32 59/hex .sp -This feature is provided as a way of creating patterns that contain binary zero -and other non-printing characters. By default, \fBpcre2test\fP passes patterns -as zero-terminated strings to \fBpcre2_compile()\fP, giving the length as -PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal, the -actual length of the pattern is passed. +Parts of such a pattern are taken literally if quoted. This pattern contains +nine characters, only two of which are specified in hexadecimal: +.sp + /ab "literal" 32/hex +.sp +Either single or double quotes may be used. There is no way of including +the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are +mutually exclusive. +.P +The POSIX API cannot be used with patterns specified in hexadecimal because +they may contain binary zeros, which conflicts with \fBregcomp()\fP's +requirement for a zero-terminated string. Such patterns are always passed to +\fBpcre2_compile()\fP as a string with a length, not as zero-terminated. +. +. +.SS "Specifying wide characters in 16-bit and 32-bit modes" +.rs +.sp +In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and +translated to UTF-16 or UTF-32 when the \fButf\fP modifier is set. For testing +the 16-bit and 32-bit libraries in non-UTF mode, the \fButf8_input\fP modifier +can be used. It is mutually exclusive with \fButf\fP. Input lines are +interpreted as UTF-8 as a means of specifying wide characters. More details are +given in +.\" HTML +.\" +"Input encoding" +.\" +above. +. +. +.SS "Generating long repetitive patterns" +.rs +.sp +Some tests use long patterns that are very repetitive. Instead of creating a +very long input line for such a pattern, you can use a special repetition +feature, similar to the one described for subject lines above. If the +\fBexpand\fP modifier is present on a pattern, parts of the pattern that have +the form +.sp + \e[]{} +.sp +are expanded before the pattern is passed to \fBpcre2_compile()\fP. For +example, \e[AB]{6000} is expanded to "ABAB..." 6000 times. This construction +cannot be nested. An initial "\e[" sequence is recognized only if "]{" followed +by decimal digits and "}" is found later in the pattern. If not, the characters +remain in the pattern unaltered. The \fBexpand\fP and \fBhex\fP modifiers are +mutually exclusive. +.P +If part of an expanded pattern looks like an expansion, but is really part of +the actual pattern, unwanted expansion can be avoided by giving two values in +the quantifier. For example, \e[AB]{6000,6000} is not recognized as an +expansion item. +.P +If the \fBinfo\fP modifier is set on an expanded pattern, the result of the +expansion is included in the information that is output. . . .SS "JIT compilation" .rs .sp -The \fB/jit\fP modifier may optionally be followed by an equals sign and a -number in the range 0 to 7: +Just-in-time (JIT) compiling is a heavyweight optimization that can greatly +speed up pattern matching. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for details. JIT compiling happens, optionally, after a pattern +has been successfully compiled into an internal form. The JIT compiler converts +this to optimized machine code. It needs to know whether the match-time options +PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, because +different code is generated for the different cases. See the \fBpartial\fP +modifier in "Subject Modifiers" +.\" HTML +.\" +below +.\" +for details of how these options are specified for each match attempt. +.P +JIT compilation is requested by the \fB/jit\fP pattern modifier, which may +optionally be followed by an equals sign and a number in the range 0 to 7. +The three bits that make up the number specify which of the three JIT operating +modes are to be compiled: +.sp + 1 compile JIT code for non-partial matching + 2 compile JIT code for soft partial matching + 4 compile JIT code for hard partial matching +.sp +The possible values for the \fBjit\fP modifier are therefore: .sp 0 disable JIT - 1 use JIT for normal match only - 2 use JIT for soft partial match only - 3 use JIT for normal match and soft partial match - 4 use JIT for hard partial match only - 6 use JIT for soft and hard partial match + 1 normal matching only + 2 soft partial matching only + 3 normal and soft partial matching + 4 hard partial matching only + 6 soft and hard partial matching only 7 all three modes .sp -If no number is given, 7 is assumed. If JIT compilation is successful, the -compiled JIT code will automatically be used when \fBpcre2_match()\fP is run -for the appropriate type of match, except when incompatible run-time options -are specified. For more details, see the +If no number is given, 7 is assumed. The phrase "partial matching" means a call +to \fBpcre2_match()\fP with either the PCRE2_PARTIAL_SOFT or the +PCRE2_PARTIAL_HARD option set. Note that such a call may return a complete +match; the options enable the possibility of a partial match, but do not +require it. Note also that if you request JIT compilation only for partial +matching (for example, /jit=2) but do not set the \fBpartial\fP modifier on a +subject line, that match will not use JIT code because none was compiled for +non-partial matching. +.P +If JIT compilation is successful, the compiled JIT code will automatically be +used when an appropriate type of match is run, except when incompatible +run-time options are specified. For more details, see the .\" HREF \fBpcre2jit\fP .\" @@ -622,14 +819,14 @@ code was actually used in the match. .SS "Setting a locale" .rs .sp -The \fB/locale\fP modifier must specify the name of a locale, for example: +The \fBlocale\fP modifier must specify the name of a locale, for example: .sp /pattern/locale=fr_FR .sp The given locale is set, \fBpcre2_maketables()\fP is called to build a set of character tables for the locale, and this is then passed to \fBpcre2_compile()\fP when compiling the regular expression. The same tables -are used when matching the following subject lines. The \fB/locale\fP modifier +are used when matching the following subject lines. The \fBlocale\fP modifier applies only to the pattern on which it appears, but can be given in a \fB#pattern\fP command if a default is needed. Setting a locale and alternate character tables are mutually exclusive. @@ -638,7 +835,7 @@ character tables are mutually exclusive. .SS "Showing pattern memory" .rs .sp -The \fB/memory\fP modifier causes the size in bytes of the memory used to hold +The \fBmemory\fP modifier causes the size in bytes of the memory used to hold the compiled pattern to be output. This does not include the size of the \fBpcre2_code\fP block; it is just the actual compiled data. If the pattern is subsequently passed to the JIT compiler, the size of the JIT compiled code is @@ -660,30 +857,54 @@ sets its own default of 220, which is required for running the standard test suite. . . +.SS "Limiting the pattern length" +.rs +.sp +The \fBmax_pattern_length\fP modifier sets a limit, in code units, to the +length of pattern that \fBpcre2_compile()\fP will accept. Breaching the limit +causes a compilation error. The default is the largest number a PCRE2_SIZE +variable can hold (essentially unlimited). +. +. .SS "Using the POSIX wrapper API" .rs .sp -The \fB/posix\fP modifier causes \fBpcre2test\fP to call PCRE2 via the POSIX -wrapper API rather than its native API. This supports only the 8-bit library. -When the POSIX API is being used, the following pattern modifiers set options -for the \fBregcomp()\fP function: +The \fB/posix\fP and \fBposix_nosub\fP modifiers cause \fBpcre2test\fP to call +PCRE2 via the POSIX wrapper API rather than its native API. When +\fBposix_nosub\fP is used, the POSIX option REG_NOSUB is passed to +\fBregcomp()\fP. The POSIX wrapper supports only the 8-bit library. Note that +it does not imply POSIX matching semantics; for more detail see the +.\" HREF +\fBpcre2posix\fP +.\" +documentation. The following pattern modifiers set options for the +\fBregcomp()\fP function: .sp caseless REG_ICASE multiline REG_NEWLINE - no_auto_capture REG_NOSUB dotall REG_DOTALL ) ungreedy REG_UNGREEDY ) These options are not part of ucp REG_UCP ) the POSIX standard utf REG_UTF8 ) .sp +The \fBregerror_buffsize\fP modifier specifies a size for the error buffer that +is passed to \fBregerror()\fP in the event of a compilation error. For example: +.sp + /abc/posix,regerror_buffsize=20 +.sp +This provides a means of testing the behaviour of \fBregerror()\fP when the +buffer is too small for the error message. If this modifier has not been set, a +large buffer is used. +.P The \fBaftertext\fP and \fBallaftertext\fP subject modifiers work as described -below. All other modifiers cause an error. +below. All other modifiers are either ignored, with a warning message, or cause +an error. . . .SS "Testing the stack guard feature" .rs .sp -The \fB/stackguard\fP modifier is used to test the use of +The \fBstackguard\fP modifier is used to test the use of \fBpcre2_set_compile_recursion_guard()\fP, a function that is provided to enable stack availability to be checked during compilation (see the .\" HREF @@ -700,7 +921,7 @@ be aborted. .SS "Using alternative character tables" .rs .sp -The value specified for the \fB/tables\fP modifier must be one of the digits 0, +The value specified for the \fBtables\fP modifier must be one of the digits 0, 1, or 2. It causes a specific set of built-in character tables to be passed to \fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour with different character tables. The digit specifies the tables as follows: @@ -720,17 +941,22 @@ are mutually exclusive. .sp The following modifiers are really subject modifiers, and are described below. However, they may be included in a pattern's modifier list, in which case they -are applied to every subject line that is processed with that pattern. They do -not affect the compilation process. +are applied to every subject line that is processed with that pattern. They may +not appear in \fB#pattern\fP commands. These modifiers do not affect the +compilation process. .sp - aftertext show text after match - allaftertext show text after captures - allcaptures show all captures - allusedtext show all consulted text - /g global global matching - mark show mark values - replace= specify a replacement string - startchar show starting character when relevant + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allusedtext show all consulted text + /g global global matching + mark show mark values + replace= specify a replacement string + startchar show starting character when relevant + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY .sp These modifiers may not appear in a \fB#pattern\fP command. If you want them as defaults, set them in a \fB#subject\fP command. @@ -746,15 +972,20 @@ facility is used when saving compiled patterns to a file, as described in the section entitled "Saving and restoring compiled patterns" .\" HTML .\" -below. +below. If \fBpushcopy\fP is used instead of \fBpush\fP, a copy of the compiled +pattern is stacked, leaving the original as current, ready to match the +following input lines. This provides a way of testing the +\fBpcre2_code_copy()\fP function. .\" -The \fBpush\fP modifier is incompatible with compilation modifiers such as -\fBglobal\fP that act at match time. Any that are specified are ignored, with a -warning message, except for \fBreplace\fP, which causes an error. Note that, -\fBjitverify\fP, which is allowed, does not carry through to any subsequent -matching that uses this pattern. +The \fBpush\fP and \fBpushcopy \fP modifiers are incompatible with compilation +modifiers such as \fBglobal\fP that act at match time. Any that are specified +are ignored (for the stacked copy), with a warning message, except for +\fBreplace\fP, which causes an error. Note that \fBjitverify\fP, which is +allowed, does not carry through to any subsequent matching that uses a stacked +pattern. . . +.\" HTML .SH "SUBJECT MODIFIERS" .rs .sp @@ -775,6 +1006,7 @@ for a description of their effects. anchored set PCRE2_ANCHORED dfa_restart set PCRE2_DFA_RESTART dfa_shortest set PCRE2_DFA_SHORTEST + no_jit set PCRE2_NO_JIT no_utf_check set PCRE2_NO_UTF_CHECK notbol set PCRE2_NOTBOL notempty set PCRE2_NOTEMPTY @@ -786,11 +1018,11 @@ for a description of their effects. The partial matching modifiers are provided with abbreviations because they appear frequently in tests. .P -If the \fB/posix\fP modifier was present on the pattern, causing the POSIX +If the \fBposix\fP modifier was present on the pattern, causing the POSIX wrapper API to be used, the only option-setting modifiers that have any effect are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP. -Any other modifiers cause an error. +The other modifiers are ignored, with a warning message. . . .SS "Setting match controls" @@ -801,33 +1033,44 @@ information. Some of them may also be specified on a pattern line (see above), in which case they apply to every subject line that is matched against that pattern. .sp - aftertext show text after match - allaftertext show text after captures - allcaptures show all captures - allusedtext show all consulted text (non-JIT only) - altglobal alternative global matching - callout_capture show captures at callout time - callout_data= set a value to pass via callouts - callout_fail=[:] control callout failure - callout_none do not supply a callout function - copy= copy captured substring - dfa use \fBpcre2_dfa_match()\fP - find_limits find match and recursion limits - get= extract captured substring - getall extract all captured substrings - /g global global matching - jitstack= set size of JIT stack - mark show mark values - match_limit=>n> set a match limit - memory show memory usage - offset= set starting offset - ovector= set size of output vector - recursion_limit= set a recursion limit - replace= specify a replacement string - startchar show startchar when relevant - zero_terminate pass the subject as zero-terminated + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allusedtext show all consulted text (non-JIT only) + altglobal alternative global matching + callout_capture show captures at callout time + callout_data= set a value to pass via callouts + callout_error=[:] control callout error + callout_fail=[:] control callout failure + callout_none do not supply a callout function + copy= copy captured substring + dfa use \fBpcre2_dfa_match()\fP + find_limits find match and recursion limits + get= extract captured substring + getall extract all captured substrings + /g global global matching + jitstack= set size of JIT stack + mark show mark values + match_limit= set a match limit + memory show memory usage + null_context match with a NULL context + offset= set starting offset + offset_limit= set offset limit + ovector= set size of output vector + recursion_limit= set a recursion limit + replace= specify a replacement string + startchar show startchar when relevant + startoffset= same as offset= + substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + zero_terminate pass the subject as zero-terminated .sp -The effects of these modifiers are described in the following sections. +The effects of these modifiers are described in the following sections. When +matching via the POSIX wrapper API, the \fBaftertext\fP, \fBallaftertext\fP, +and \fBovector\fP subject modifiers work as described below. All other +modifiers are either ignored, with a warning message, or cause an error. . . .SS "Showing more text" @@ -882,7 +1125,8 @@ The \fBallcaptures\fP modifier requests that the values of all potential captured parentheses be output after a match. By default, only those up to the highest one actually used in the match are output (corresponding to the return code from \fBpcre2_match()\fP). Groups that did not take part in the match -are output as "". +are output as "". This modifier is not relevant for DFA matching (which +does no capturing); it is ignored, with a warning message, if present. . . .SS "Testing callouts" @@ -890,14 +1134,20 @@ are output as "". .sp A callout function is supplied when \fBpcre2test\fP calls the library matching functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is -set, the current captured groups are output when a callout occurs. +set, the current captured groups are output when a callout occurs. The default +return from the callout function is zero, which allows matching to continue. .P The \fBcallout_fail\fP modifier can be given one or two numbers. If there is -only one number, 1 is returned instead of 0 when a callout of that number is -reached. If two numbers are given, 1 is returned when callout is reached -for the th time. Note that callouts with string arguments are always given -the number zero. See "Callouts" below for a description of the output when a -callout it taken. +only one number, 1 is returned instead of 0 (causing matching to backtrack) +when a callout of that number is reached. If two numbers (:) are given, 1 +is returned when callout is reached and there have been at least +callouts. The \fBcallout_error\fP modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +\fBcallout_error\fP takes precedence. +.P +Note that callouts with string arguments are always given the number zero. See +"Callouts" below for a description of the output when a callout it taken. .P The \fBcallout_data\fP modifier can be given an unsigned or a negative number. This is set as the "user data" that is passed to the matching function, and @@ -909,7 +1159,7 @@ used as a return from \fBpcre2test\fP's callout function. .rs .sp Searching for all possible matches within a subject can be requested by the -\fBglobal\fP or \fB/altglobal\fP modifier. After finding a match, the matching +\fBglobal\fP or \fBaltglobal\fP modifier. After finding a match, the matching function is called again to search the remainder of the subject. The difference between \fBglobal\fP and \fBaltglobal\fP is that the former uses the \fIstart_offset\fP argument to \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP @@ -957,18 +1207,30 @@ by name. .rs .sp If the \fBreplace\fP modifier is set, the \fBpcre2_substitute()\fP function is -called instead of one of the matching functions. Unlike subject strings, -\fBpcre2test\fP does not process replacement strings for escape sequences. In -UTF mode, a replacement string is checked to see if it is a valid UTF-8 string. -If so, it is correctly converted to a UTF string of the appropriate code unit -width. If it is not a valid UTF-8 string, the individual code units are copied -directly. This provides a means of passing an invalid UTF-8 string for testing -purposes. +called instead of one of the matching functions. Note that replacement strings +cannot contain commas, because a comma signifies the end of a modifier. This is +not thought to be an issue in a test program. .P -If the \fBglobal\fP modifier is set, PCRE2_SUBSTITUTE_GLOBAL is passed to -\fBpcre2_substitute()\fP. After a successful substitution, the modified string -is output, preceded by the number of replacements. This may be zero if there -were no matches. Here is a simple example of a substitution test: +Unlike subject strings, \fBpcre2test\fP does not process replacement strings +for escape sequences. In UTF mode, a replacement string is checked to see if it +is a valid UTF-8 string. If so, it is correctly converted to a UTF string of +the appropriate code unit width. If it is not a valid UTF-8 string, the +individual code units are copied directly. This provides a means of passing an +invalid UTF-8 string for testing purposes. +.P +The following modifiers set options (in additional to the normal match options) +for \fBpcre2_substitute()\fP: +.sp + global PCRE2_SUBSTITUTE_GLOBAL + substitute_extended PCRE2_SUBSTITUTE_EXTENDED + substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY +.sp +.P +After a successful substitution, the modified string is output, preceded by the +number of replacements. This may be zero if there were no matches. Here is a +simple example of a substitution test: .sp /abc/replace=xxx =abc=abc= @@ -976,12 +1238,12 @@ were no matches. Here is a simple example of a substitution test: =abc=abc=\e=global 2: =xxx=xxx= .sp -Subject and replacement strings should be kept relatively short for -substitution tests, as fixed-size buffers are used. To make it easy to test for -buffer overflow, if the replacement string starts with a number in square -brackets, that number is passed to \fBpcre2_substitute()\fP as the size of the -output buffer, with the replacement string starting at the next character. Here -is an example that tests the edge case: +Subject and replacement strings should be kept relatively short (fewer than 256 +characters) for substitution tests, as fixed-size buffers are used. To make it +easy to test for buffer overflow, if the replacement string starts with a +number in square brackets, that number is passed to \fBpcre2_substitute()\fP as +the size of the output buffer, with the replacement string starting at the next +character. Here is an example that tests the edge case: .sp /abc/ 123abc123\e=replace=[10]XYZ @@ -989,6 +1251,19 @@ is an example that tests the edge case: 123abc123\e=replace=[9]XYZ Failed: error -47: no more memory .sp +The default action of \fBpcre2_substitute()\fP is to return +PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the +\fBsubstitute_overflow_length\fP modifier), \fBpcre2_substitute()\fP continues +to go through the motions of matching and substituting, in order to compute the +size of buffer that is required. When this happens, \fBpcre2test\fP shows the +required buffer length (which includes space for the trailing zero) as part of +the error message. For example: +.sp + /abc/substitute_overflow_length + 123abc123\e=replace=[9]XYZ + Failed: error -47: no more memory: 10 code units are needed +.sp A replacement string is ignored with POSIX and DFA matching. Specifying partial matching provokes an error return ("bad option value") from \fBpcre2_substitute()\fP. @@ -1059,6 +1334,16 @@ The \fBoffset\fP modifier sets an offset in the subject string at which matching starts. Its value is a number of code units, not characters. . . +.SS "Setting an offset limit" +.rs +.sp +The \fBoffset_limit\fP modifier sets a limit for unanchored matches. If a match +cannot be found starting at or before this offset in the subject, a "no match" +return is given. The data value is a number of code units, not characters. When +this modifier is used, the \fBuse_offset_limit\fP modifier must have been set +for the pattern; if not, an error is generated. +. +. .SS "Setting the size of the output vector" .rs .sp @@ -1089,6 +1374,17 @@ When testing \fBpcre2_substitute()\fP, this modifier also has the effect of passing the replacement string as zero-terminated. . . +.SS "Passing a NULL context" +.rs +.sp +Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP, +\fBpcre2_dfa_match()\fP or \fBpcre2_jit_match()\fP. If the \fBnull_context\fP +modifier is set, however, NULL is passed. This is for testing that the matching +functions behave correctly in this case (they use default values). This +modifier cannot be used with the \fBfind_limits\fP modifier or when testing the +substitution function. +. +. .SH "THE ALTERNATIVE MATCHING FUNCTION" .rs .sp @@ -1156,7 +1452,7 @@ unset substring is shown as "", as for the second data line. If the strings contain any non-printing characters, they are output as \exhh escapes if the value is less than 256 and UTF mode is not set. Otherwise they are output as \ex{hh...} escapes. See below for the definition of non-printing -characters. If the \fB/aftertext\fP modifier is set, the output for substring +characters. If the \fBaftertext\fP modifier is set, the output for substring 0 is followed by the the rest of the subject string, identified by "0+" like this: .sp @@ -1286,7 +1582,9 @@ item to be tested. For example: This output indicates that callout number 0 occurred for a match attempt starting at the fourth character of the subject string, when the pointer was at the seventh character, and when the next pattern item was \ed. Just -one circumflex is output if the start and current positions are the same. +one circumflex is output if the start and current positions are the same, or if +the current position precedes the start position, which can happen if the +callout is in a lookbehind assertion. .P Callouts numbered 255 are assumed to be automatic callouts, inserted as a result of the \fB/auto_callout\fP pattern modifier. In this case, instead of @@ -1352,7 +1650,7 @@ therefore shown as hex escapes. .P When \fBpcre2test\fP is outputting text that is a matched part of a subject string, it behaves in the same way, unless a different locale has been set for -the pattern (using the \fB/locale\fP modifier). In this case, the +the pattern (using the \fBlocale\fP modifier). In this case, the \fBisprint()\fP function is used to distinguish printing and non-printing characters. . @@ -1382,11 +1680,15 @@ can be used to test these functions. .P When a pattern with \fBpush\fP modifier is successfully compiled, it is pushed onto a stack of compiled patterns, and \fBpcre2test\fP expects the next line to -contain a new pattern (or command) instead of a subject line. By this means, a -number of patterns can be compiled and retained. The \fBpush\fP modifier is -incompatible with \fBposix\fP, and control modifiers that act at match time are -ignored (with a message). The \fBjitverify\fP modifier applies only at compile -time. The command +contain a new pattern (or command) instead of a subject line. By contrast, +the \fBpushcopy\fP modifier causes a copy of the compiled pattern to be +stacked, leaving the original available for immediate matching. By using +\fBpush\fP and/or \fBpushcopy\fP, a number of patterns can be compiled and +retained. These modifiers are incompatible with \fBposix\fP, and control +modifiers that act at match time are ignored (with a message) for the stacked +patterns. The \fBjitverify\fP modifier applies only at compile time. +.P +The command .sp #save .sp @@ -1406,7 +1708,8 @@ modifier list containing only control modifiers .\" that act after a pattern has been compiled. In particular, \fBhex\fP, -\fBposix\fP, and \fBpush\fP are not allowed, nor are any +\fBposix\fP, \fBposix_nosub\fP, \fBpush\fP, and \fBpushcopy\fP are not allowed, +nor are any .\" HTML .\" option-setting modifiers. @@ -1426,6 +1729,10 @@ reloads two patterns. .sp If \fBjitverify\fP is used with #pop, it does not automatically imply \fBjit\fP, which is different behaviour from when it is used on a pattern. +.P +The #popcopy command is analagous to the \fBpushcopy\fP modifier in that it +makes current a copy of the topmost stack pattern, leaving the original still +on the stack. . . . @@ -1451,6 +1758,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 20 May 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 28 December 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2test.txt b/pcre2/doc/pcre2test.txt index c022a9ce5..52f0e18b5 100644 --- a/pcre2/doc/pcre2test.txt +++ b/pcre2/doc/pcre2test.txt @@ -26,7 +26,7 @@ SYNOPSIS As the original fairly simple PCRE library evolved, it acquired many different features, and as a result, the original pcretest program - ended up with a lot of options in a messy, arcane syntax, for testing + ended up with a lot of options in a messy, arcane syntax for testing all the features. The move to the new PCRE2 API provided an opportunity to re-implement the test program as pcre2test, with a cleaner modifier syntax. Nevertheless, there are still many obscure modifiers, some of @@ -45,7 +45,7 @@ PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES installed. The pcre2test program can be used to test all the libraries. However, its own input and output are always in 8-bit format. When testing the 16-bit or 32-bit libraries, patterns and subject strings - are converted to 16- or 32-bit format before being passed to the + are converted to 16-bit or 32-bit format before being passed to the library functions. Results are converted back to 8-bit code units for output. @@ -58,49 +58,80 @@ PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES INPUT ENCODING Input to pcre2test is processed line by line, either by calling the C - library's fgets() function, or via the libreadline library (see below). + library's fgets() function, or via the libreadline library. In some + Windows environments character 26 (hex 1A) causes an immediate end of + file, and no further data is read, so this character should be avoided + unless you really want that action. + The input is processed using using C's string functions, so must not contain binary zeroes, even though in Unix-like environments, fgets() - treats any bytes other than newline as data characters. In some Windows - environments character 26 (hex 1A) causes an immediate end of file, and - no further data is read. + treats any bytes other than newline as data characters. An error is + generated if a binary zero is encountered. Subject lines are processed + for backslash escapes, which makes it possible to include any data + value in strings that are passed to the library for matching. For pat- + terns, there is a facility for specifying some or all of the 8-bit + input characters as hexadecimal pairs, which makes it possible to + include binary zeros. - For maximum portability, therefore, it is safest to avoid non-printing - characters in pcre2test input files. There is a facility for specifying - a pattern's characters as hexadecimal pairs, thus making it possible to - include binary zeroes in a pattern for testing purposes. Subject lines - are processed for backslash escapes, which makes it possible to include - any data value. + Input for the 16-bit and 32-bit libraries + + When testing the 16-bit or 32-bit libraries, there is a need to be able + to generate character code points greater than 255 in the strings that + are passed to the library. For subject lines, backslash escapes can be + used. In addition, when the utf modifier (see "Setting compilation + options" below) is set, the pattern and any following subject lines are + interpreted as UTF-8 strings and translated to UTF-16 or UTF-32 as + appropriate. + + For non-UTF testing of wide characters, the utf8_input modifier can be + used. This is mutually exclusive with utf, and is allowed only in + 16-bit or 32-bit mode. It causes the pattern and following subject + lines to be treated as UTF-8 according to the original definition (RFC + 2279), which allows for character values up to 0x7fffffff. Each charac- + ter is placed in one 16-bit or 32-bit code unit (in the 16-bit case, + values greater than 0xffff cause an error to occur). + + UTF-8 is not capable of encoding values greater than 0x7fffffff, but + such values can be handled by the 32-bit library. When testing this + library in non-UTF mode with utf8_input set, if any character is pre- + ceded by the byte 0xff (which is an illegal byte in UTF-8) 0x80000000 + is added to the character's value. This is the only way of passing such + code points in a pattern string. For subject strings, using an escape + sequence is preferable. COMMAND LINE OPTIONS -8 If the 8-bit library has been built, this option causes it to - be used (this is the default). If the 8-bit library has not + be used (this is the default). If the 8-bit library has not been built, this option causes an error. - -16 If the 16-bit library has been built, this option causes it - to be used. If only the 16-bit library has been built, this - is the default. If the 16-bit library has not been built, + -16 If the 16-bit library has been built, this option causes it + to be used. If only the 16-bit library has been built, this + is the default. If the 16-bit library has not been built, this option causes an error. - -32 If the 32-bit library has been built, this option causes it - to be used. If only the 32-bit library has been built, this - is the default. If the 32-bit library has not been built, + -32 If the 32-bit library has been built, this option causes it + to be used. If only the 32-bit library has been built, this + is the default. If the 32-bit library has not been built, this option causes an error. - -b Behave as if each pattern has the /fullbincode modifier; the + -ac Behave as if each pattern has the auto_callout modifier, that + is, insert automatic callouts into every pattern that is com- + piled. + + -b Behave as if each pattern has the fullbincode modifier; the full internal binary form of the pattern is output after com- pilation. - -C Output the version number of the PCRE2 library, and all - available information about the optional features that are - included, and then exit with zero exit code. All other + -C Output the version number of the PCRE2 library, and all + available information about the optional features that are + included, and then exit with zero exit code. All other options are ignored. - -C option Output information about a specific build-time option, then - exit. This functionality is intended for use in scripts such - as RunTest. The following options output the value and set + -C option Output information about a specific build-time option, then + exit. This functionality is intended for use in scripts such + as RunTest. The following options output the value and set the exit code as indicated: ebcdic-nl the code for LF (= NL) in an EBCDIC environment: @@ -116,34 +147,42 @@ COMMAND LINE OPTIONS ANYCRLF or ANY exit code is always 0 - The following options output 1 for true or 0 for false, and + The following options output 1 for true or 0 for false, and set the exit code to the same value: - ebcdic compiled for an EBCDIC environment - jit just-in-time support is available - pcre2-16 the 16-bit library was built - pcre2-32 the 32-bit library was built - pcre2-8 the 8-bit library was built - unicode Unicode support is available + backslash-C \C is supported (not locked out) + ebcdic compiled for an EBCDIC environment + jit just-in-time support is available + pcre2-16 the 16-bit library was built + pcre2-32 the 32-bit library was built + pcre2-8 the 8-bit library was built + unicode Unicode support is available - If an unknown option is given, an error message is output; + If an unknown option is given, an error message is output; the exit code is 0. - -d Behave as if each pattern has the debug modifier; the inter- + -d Behave as if each pattern has the debug modifier; the inter- nal form and information about the compiled pattern is output after compilation; -d is equivalent to -b -i. -dfa Behave as if each subject line has the dfa modifier; matching - is done using the pcre2_dfa_match() function instead of the + is done using the pcre2_dfa_match() function instead of the default pcre2_match(). + -error number[,number,...] + Call pcre2_get_error_message() for each of the error numbers + in the comma-separated list, display the resulting messages + on the standard output, then exit with zero exit code. The + numbers may be positive or negative. This is a convenience + facility for PCRE2 maintainers. + -help Output a brief summary these options and then exit. - -i Behave as if each pattern has the /info modifier; information + -i Behave as if each pattern has the info modifier; information about the compiled pattern is given after compilation. - -jit Behave as if each pattern line has the jit modifier; after - successful compilation, each pattern is passed to the just- + -jit Behave as if each pattern line has the jit modifier; after + successful compilation, each pattern is passed to the just- in-time compiler, if available. -pattern modifier-list @@ -152,25 +191,25 @@ COMMAND LINE OPTIONS -q Do not output the version number of pcre2test at the start of execution. - -S size On Unix-like systems, set the size of the run-time stack to + -S size On Unix-like systems, set the size of the run-time stack to size megabytes. -subject modifier-list Behave as if each subject line contains the given modifiers. - -t Run each compile and match many times with a timer, and out- - put the resulting times per compile or match. When JIT is - used, separate times are given for the initial compile and - the JIT compile. You can control the number of iterations - that are used for timing by following -t with a number (as a - separate item on the command line). For example, "-t 1000" + -t Run each compile and match many times with a timer, and out- + put the resulting times per compile or match. When JIT is + used, separate times are given for the initial compile and + the JIT compile. You can control the number of iterations + that are used for timing by following -t with a number (as a + separate item on the command line). For example, "-t 1000" iterates 1000 times. The default is to iterate 500,000 times. -tm This is like -t except that it times only the matching phase, not the compile phase. - -T -TM These behave like -t and -tm, but in addition, at the end of - a run, the total times for all compiles and matches are out- + -T -TM These behave like -t and -tm, but in addition, at the end of + a run, the total times for all compiles and matches are out- put. -version Output the PCRE2 version number and then exit. @@ -178,38 +217,39 @@ COMMAND LINE OPTIONS DESCRIPTION - If pcre2test is given two filename arguments, it reads from the first + If pcre2test is given two filename arguments, it reads from the first and writes to the second. If the first name is "-", input is taken from - the standard input. If pcre2test is given only one argument, it reads + the standard input. If pcre2test is given only one argument, it reads from that file and writes to stdout. Otherwise, it reads from stdin and writes to stdout. - When pcre2test is built, a configuration option can specify that it - should be linked with the libreadline or libedit library. When this is - done, if the input is from a terminal, it is read using the readline() + When pcre2test is built, a configuration option can specify that it + should be linked with the libreadline or libedit library. When this is + done, if the input is from a terminal, it is read using the readline() function. This provides line-editing and history facilities. The output from the -help option states whether or not readline() will be used. - The program handles any number of tests, each of which consists of a - set of input lines. Each set starts with a regular expression pattern, + The program handles any number of tests, each of which consists of a + set of input lines. Each set starts with a regular expression pattern, followed by any number of subject lines to be matched against that pat- tern. In between sets of test data, command lines that begin with # may appear. This file format, with some restrictions, can also be processed - by the perltest.sh script that is distributed with PCRE2 as a means of + by the perltest.sh script that is distributed with PCRE2 as a means of checking that the behaviour of PCRE2 and Perl is the same. When the input is a terminal, pcre2test prompts for each line of input, - using "re>" to prompt for regular expression patterns, and "data>" to - prompt for subject lines. Command lines starting with # can be entered + using "re>" to prompt for regular expression patterns, and "data>" to + prompt for subject lines. Command lines starting with # can be entered only in response to the "re>" prompt. - Each subject line is matched separately and independently. If you want + Each subject line is matched separately and independently. If you want to do multi-line matches, you have to use the \n escape sequence (or \r - or \r\n, etc., depending on the newline setting) in a single line of - input to encode the newline sequences. There is no limit on the length - of subject lines; the input buffer is automatically extended if it is - too small. There is a replication feature that makes it possible to - generate long subject lines without having to supply them explicitly. + or \r\n, etc., depending on the newline setting) in a single line of + input to encode the newline sequences. There is no limit on the length + of subject lines; the input buffer is automatically extended if it is + too small. There are replication features that makes it possible to + generate long repetitive pattern or subject lines without having to + supply them explicitly. An empty line or the end of the file signals the end of the subject lines for a test, at which point a new pattern or command line is @@ -247,6 +287,36 @@ COMMAND LINES as described in the section entitled "Saving and restoring compiled patterns" below. + #newline_default [] + + When PCRE2 is built, a default newline convention can be specified. + This determines which characters and/or character pairs are recognized + as indicating a newline in a pattern or subject string. The default can + be overridden when a pattern is compiled. The standard test files con- + tain tests of various newline conventions, but the majority of the + tests expect a single linefeed to be recognized as a newline by + default. Without special action the tests would fail when PCRE2 is com- + piled with either CR or CRLF as the default newline. + + The #newline_default command specifies a list of newline types that are + acceptable as the default. The types must be one of CR, LF, CRLF, ANY- + CRLF, or ANY (in upper or lower case), for example: + + #newline_default LF Any anyCRLF + + If the default newline is in the list, this command has no effect. Oth- + erwise, except when testing the POSIX API, a newline modifier that + specifies the first newline convention in the list (LF in the above + example) is added to any pattern that does not already have a newline + modifier. If the newline list is empty, the feature is turned off. This + command is present in a number of the standard test input files. + + When the POSIX API is being tested there is no way to override the + default newline convention, though it is possible to set the newline + convention from within the pattern. A warning is given if the posix + modifier is used when #newline_default would set a default for the non- + POSIX API. + #pattern This command sets a default modifier list that applies to all subse- @@ -264,10 +334,11 @@ COMMAND LINES wrong file. #pop [] + #popcopy [] - This command is used to manipulate the stack of compiled patterns, as - described in the section entitled "Saving and restoring compiled pat- - terns" below. + These commands are used to manipulate the stack of compiled patterns, + as described in the section entitled "Saving and restoring compiled + patterns" below. #save @@ -285,12 +356,14 @@ COMMAND LINES MODIFIER SYNTAX Modifier lists are used with both pattern and subject lines. Items in a - list are separated by commas and optional white space. Some modifiers - may be given for both patterns and subject lines, whereas others are - valid for one or the other only. Each modifier has a long name, for - example "anchored", and some of them must be followed by an equals sign - and a value, for example, "offset=12". Modifiers that do not take val- - ues may be preceded by a minus sign to turn off a previous setting. + list are separated by commas followed by optional white space. Trailing + whitespace in a modifier list is ignored. Some modifiers may be given + for both patterns and subject lines, whereas others are valid only for + one or the other. Each modifier has a long name, for example + "anchored", and some of them must be followed by an equals sign and a + value, for example, "offset=12". Values cannot contain comma charac- + ters, but may contain spaces. Modifiers that do not take values may be + preceded by a minus sign to turn off a previous setting. A few of the more common modifiers can also be specified as single let- ters, for example "i" for "caseless". In documentation, following the @@ -400,6 +473,13 @@ SUBJECT LINE SYNTAX abc\=notbol,notempty + If the subject string is empty and \= is followed by whitespace, the + line is treated as a comment line, and is not used for matching. For + example: + + \= This is a comment. + abc\= This is an invalid modifier list. + A backslash followed by any other non-alphanumeric character just escapes that character. A backslash followed by anything else causes an error. However, if the very last character in the line is a backslash @@ -410,20 +490,21 @@ SUBJECT LINE SYNTAX PATTERN MODIFIERS - There are three types of modifier that can appear in pattern lines, two - of which may also be used in a #pattern command. A pattern's modifier - list can add to or override default modifiers that were set by a previ- - ous #pattern command. + There are several types of modifier that can appear in pattern lines. + Except where noted below, they may also be used in #pattern commands. A + pattern's modifier list can add to or override default modifiers that + were set by a previous #pattern command. Setting compilation options The following modifiers set options for pcre2_compile(). The most com- - mon ones have single-letter abbreviations. See pcreapi for a descrip- + mon ones have single-letter abbreviations. See pcre2api for a descrip- tion of their effects. allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS alt_bsux set PCRE2_ALT_BSUX alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_verbnames set PCRE2_ALT_VERBNAMES anchored set PCRE2_ANCHORED auto_callout set PCRE2_AUTO_CALLOUT /i caseless set PCRE2_CASELESS @@ -444,12 +525,15 @@ PATTERN MODIFIERS no_utf_check set PCRE2_NO_UTF_CHECK ucp set PCRE2_UCP ungreedy set PCRE2_UNGREEDY + use_offset_limit set PCRE2_USE_OFFSET_LIMIT utf set PCRE2_UTF As well as turning on the PCRE2_UTF option, the utf modifier causes all non-printing characters in output strings to be printed using the \x{hh...} notation. Otherwise, those less than 0x100 are output in hex - without the curly brackets. + without the curly brackets. Setting utf in 16-bit or 32-bit mode also + causes pattern and subject strings to be translated to UTF-16 or + UTF-32, respectively, before being passed to library functions. Setting compilation controls @@ -462,18 +546,24 @@ PATTERN MODIFIERS debug same as info,fullbincode fullbincode show binary code with lengths /I info show info about compiled pattern - hex pattern is coded in hexadecimal + hex unquoted characters are hexadecimal jit[=] use JIT jitfast use JIT fast path jitverify verify JIT use locale= use this locale + max_pattern_length= set the maximum pattern length memory show memory used newline= set newline type + null_context compile with a NULL context parens_nest_limit= set maximum parentheses depth posix use the POSIX API + posix_nosub use the POSIX API with REG_NOSUB push push compiled pattern onto the stack + pushcopy push a copy onto the stack stackguard= test the stackguard feature tables=[0|1|2] select internal tables + use_length do not zero-terminate the pattern + utf8_input treat input as UTF-8 The effects of these modifiers are described in the following sections. @@ -539,39 +629,129 @@ PATTERN MODIFIERS mation that is requested. For each callout, either its number or string is given, followed by the item that follows it in the pattern. - Specifying a pattern in hex + Passing a NULL context - The hex modifier specifies that the characters of the pattern are to be - interpreted as pairs of hexadecimal digits. White space is permitted - between pairs. For example: + Normally, pcre2test passes a context block to pcre2_compile(). If the + null_context modifier is set, however, NULL is passed. This is for + testing that pcre2_compile() behaves correctly in this case (it uses + default values). + + Specifying the pattern's length + + By default, patterns are passed to the compiling functions as zero-ter- + minated strings. When using the POSIX wrapper API, there is no other + option. However, when using PCRE2's native API, patterns can be passed + by length instead of being zero-terminated. The use_length modifier + causes this to happen. Using a length happens automatically (whether + or not use_length is set) when hex is set, because patterns specified + in hexadecimal may contain binary zeros. + + Specifying pattern characters in hexadecimal + + The hex modifier specifies that the characters of the pattern, except + for substrings enclosed in single or double quotes, are to be inter- + preted as pairs of hexadecimal digits. This feature is provided as a + way of creating patterns that contain binary zeros and other non-print- + ing characters. White space is permitted between pairs of digits. For + example, this pattern contains three characters: /ab 32 59/hex - This feature is provided as a way of creating patterns that contain - binary zero and other non-printing characters. By default, pcre2test - passes patterns as zero-terminated strings to pcre2_compile(), giving - the length as PCRE2_ZERO_TERMINATED. However, for patterns specified in - hexadecimal, the actual length of the pattern is passed. + Parts of such a pattern are taken literally if quoted. This pattern + contains nine characters, only two of which are specified in hexadeci- + mal: + + /ab "literal" 32/hex + + Either single or double quotes may be used. There is no way of includ- + ing the delimiter within a substring. The hex and expand modifiers are + mutually exclusive. + + The POSIX API cannot be used with patterns specified in hexadecimal + because they may contain binary zeros, which conflicts with regcomp()'s + requirement for a zero-terminated string. Such patterns are always + passed to pcre2_compile() as a string with a length, not as zero-termi- + nated. + + Specifying wide characters in 16-bit and 32-bit modes + + In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 + and translated to UTF-16 or UTF-32 when the utf modifier is set. For + testing the 16-bit and 32-bit libraries in non-UTF mode, the utf8_input + modifier can be used. It is mutually exclusive with utf. Input lines + are interpreted as UTF-8 as a means of specifying wide characters. More + details are given in "Input encoding" above. + + Generating long repetitive patterns + + Some tests use long patterns that are very repetitive. Instead of cre- + ating a very long input line for such a pattern, you can use a special + repetition feature, similar to the one described for subject lines + above. If the expand modifier is present on a pattern, parts of the + pattern that have the form + + \[]{} + + are expanded before the pattern is passed to pcre2_compile(). For exam- + ple, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction + cannot be nested. An initial "\[" sequence is recognized only if "]{" + followed by decimal digits and "}" is found later in the pattern. If + not, the characters remain in the pattern unaltered. The expand and hex + modifiers are mutually exclusive. + + If part of an expanded pattern looks like an expansion, but is really + part of the actual pattern, unwanted expansion can be avoided by giving + two values in the quantifier. For example, \[AB]{6000,6000} is not rec- + ognized as an expansion item. + + If the info modifier is set on an expanded pattern, the result of the + expansion is included in the information that is output. JIT compilation - The /jit modifier may optionally be followed by an equals sign and a - number in the range 0 to 7: + Just-in-time (JIT) compiling is a heavyweight optimization that can + greatly speed up pattern matching. See the pcre2jit documentation for + details. JIT compiling happens, optionally, after a pattern has been + successfully compiled into an internal form. The JIT compiler converts + this to optimized machine code. It needs to know whether the match-time + options PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, + because different code is generated for the different cases. See the + partial modifier in "Subject Modifiers" below for details of how these + options are specified for each match attempt. + + JIT compilation is requested by the /jit pattern modifier, which may + optionally be followed by an equals sign and a number in the range 0 to + 7. The three bits that make up the number specify which of the three + JIT operating modes are to be compiled: + + 1 compile JIT code for non-partial matching + 2 compile JIT code for soft partial matching + 4 compile JIT code for hard partial matching + + The possible values for the jit modifier are therefore: 0 disable JIT - 1 use JIT for normal match only - 2 use JIT for soft partial match only - 3 use JIT for normal match and soft partial match - 4 use JIT for hard partial match only - 6 use JIT for soft and hard partial match + 1 normal matching only + 2 soft partial matching only + 3 normal and soft partial matching + 4 hard partial matching only + 6 soft and hard partial matching only 7 all three modes - If no number is given, 7 is assumed. If JIT compilation is successful, - the compiled JIT code will automatically be used when pcre2_match() is - run for the appropriate type of match, except when incompatible run- - time options are specified. For more details, see the pcre2jit documen- - tation. See also the jitstack modifier below for a way of setting the - size of the JIT stack. + If no number is given, 7 is assumed. The phrase "partial matching" + means a call to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the + PCRE2_PARTIAL_HARD option set. Note that such a call may return a com- + plete match; the options enable the possibility of a partial match, but + do not require it. Note also that if you request JIT compilation only + for partial matching (for example, /jit=2) but do not set the partial + modifier on a subject line, that match will not use JIT code because + none was compiled for non-partial matching. + + If JIT compilation is successful, the compiled JIT code will automati- + cally be used when an appropriate type of match is run, except when + incompatible run-time options are specified. For more details, see the + pcre2jit documentation. See also the jitstack modifier below for a way + of setting the size of the JIT stack. If the jitfast modifier is specified, matching is done using the JIT "fast path" interface, pcre2_jit_match(), which skips some of the san- @@ -588,24 +768,24 @@ PATTERN MODIFIERS Setting a locale - The /locale modifier must specify the name of a locale, for example: + The locale modifier must specify the name of a locale, for example: /pattern/locale=fr_FR The given locale is set, pcre2_maketables() is called to build a set of character tables for the locale, and this is then passed to pcre2_com- pile() when compiling the regular expression. The same tables are used - when matching the following subject lines. The /locale modifier applies + when matching the following subject lines. The locale modifier applies only to the pattern on which it appears, but can be given in a #pattern command if a default is needed. Setting a locale and alternate charac- ter tables are mutually exclusive. Showing pattern memory - The /memory modifier causes the size in bytes of the memory used to - hold the compiled pattern to be output. This does not include the size - of the pcre2_code block; it is just the actual compiled data. If the - pattern is subsequently passed to the JIT compiler, the size of the JIT + The memory modifier causes the size in bytes of the memory used to hold + the compiled pattern to be output. This does not include the size of + the pcre2_code block; it is just the actual compiled data. If the pat- + tern is subsequently passed to the JIT compiler, the size of the JIT compiled code is also output. Here is an example: re> /a(b)c/jit,memory @@ -621,39 +801,59 @@ PATTERN MODIFIERS pcre2test sets its own default of 220, which is required for running the standard test suite. + Limiting the pattern length + + The max_pattern_length modifier sets a limit, in code units, to the + length of pattern that pcre2_compile() will accept. Breaching the limit + causes a compilation error. The default is the largest number a + PCRE2_SIZE variable can hold (essentially unlimited). + Using the POSIX wrapper API - The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap- - per API rather than its native API. This supports only the 8-bit - library. When the POSIX API is being used, the following pattern modi- - fiers set options for the regcomp() function: + The /posix and posix_nosub modifiers cause pcre2test to call PCRE2 via + the POSIX wrapper API rather than its native API. When posix_nosub is + used, the POSIX option REG_NOSUB is passed to regcomp(). The POSIX + wrapper supports only the 8-bit library. Note that it does not imply + POSIX matching semantics; for more detail see the pcre2posix documenta- + tion. The following pattern modifiers set options for the regcomp() + function: caseless REG_ICASE multiline REG_NEWLINE - no_auto_capture REG_NOSUB dotall REG_DOTALL ) ungreedy REG_UNGREEDY ) These options are not part of ucp REG_UCP ) the POSIX standard utf REG_UTF8 ) + The regerror_buffsize modifier specifies a size for the error buffer + that is passed to regerror() in the event of a compilation error. For + example: + + /abc/posix,regerror_buffsize=20 + + This provides a means of testing the behaviour of regerror() when the + buffer is too small for the error message. If this modifier has not + been set, a large buffer is used. + The aftertext and allaftertext subject modifiers work as described - below. All other modifiers cause an error. + below. All other modifiers are either ignored, with a warning message, + or cause an error. Testing the stack guard feature - The /stackguard modifier is used to test the use of pcre2_set_com- - pile_recursion_guard(), a function that is provided to enable stack - availability to be checked during compilation (see the pcre2api docu- - mentation for details). If the number specified by the modifier is + The stackguard modifier is used to test the use of pcre2_set_com- + pile_recursion_guard(), a function that is provided to enable stack + availability to be checked during compilation (see the pcre2api docu- + mentation for details). If the number specified by the modifier is greater than zero, pcre2_set_compile_recursion_guard() is called to set - up callback from pcre2_compile() to a local function. The argument it - receives is the current nesting parenthesis depth; if this is greater + up callback from pcre2_compile() to a local function. The argument it + receives is the current nesting parenthesis depth; if this is greater than the value given by the modifier, non-zero is returned, causing the compilation to be aborted. Using alternative character tables - The value specified for the /tables modifier must be one of the digits + The value specified for the tables modifier must be one of the digits 0, 1, or 2. It causes a specific set of built-in character tables to be passed to pcre2_compile(). This is used in the PCRE2 tests to check be- haviour with different character tables. The digit specifies the tables @@ -664,25 +864,30 @@ PATTERN MODIFIERS pcre2_chartables.c.dist 2 a set of tables defining ISO 8859 characters - In table 2, some characters whose codes are greater than 128 are iden- - tified as letters, digits, spaces, etc. Setting alternate character + In table 2, some characters whose codes are greater than 128 are iden- + tified as letters, digits, spaces, etc. Setting alternate character tables and a locale are mutually exclusive. Setting certain match controls The following modifiers are really subject modifiers, and are described - below. However, they may be included in a pattern's modifier list, in - which case they are applied to every subject line that is processed - with that pattern. They do not affect the compilation process. + below. However, they may be included in a pattern's modifier list, in + which case they are applied to every subject line that is processed + with that pattern. They may not appear in #pattern commands. These mod- + ifiers do not affect the compilation process. - aftertext show text after match - allaftertext show text after captures - allcaptures show all captures - allusedtext show all consulted text - /g global global matching - mark show mark values - replace= specify a replacement string - startchar show starting character when relevant + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allusedtext show all consulted text + /g global global matching + mark show mark values + replace= specify a replacement string + startchar show starting character when relevant + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY These modifiers may not appear in a #pattern command. If you want them as defaults, set them in a #subject command. @@ -694,11 +899,15 @@ PATTERN MODIFIERS next line to contain a new pattern (or a command) instead of a subject line. This facility is used when saving compiled patterns to a file, as described in the section entitled "Saving and restoring compiled pat- - terns" below. The push modifier is incompatible with compilation modi- - fiers such as global that act at match time. Any that are specified are - ignored, with a warning message, except for replace, which causes an - error. Note that, jitverify, which is allowed, does not carry through - to any subsequent matching that uses this pattern. + terns" below. If pushcopy is used instead of push, a copy of the com- + piled pattern is stacked, leaving the original as current, ready to + match the following input lines. This provides a way of testing the + pcre2_code_copy() function. The push and pushcopy modifiers are + incompatible with compilation modifiers such as global that act at + match time. Any that are specified are ignored (for the stacked copy), + with a warning message, except for replace, which causes an error. Note + that jitverify, which is allowed, does not carry through to any subse- + quent matching that uses a stacked pattern. SUBJECT MODIFIERS @@ -714,6 +923,7 @@ SUBJECT MODIFIERS anchored set PCRE2_ANCHORED dfa_restart set PCRE2_DFA_RESTART dfa_shortest set PCRE2_DFA_SHORTEST + no_jit set PCRE2_NO_JIT no_utf_check set PCRE2_NO_UTF_CHECK notbol set PCRE2_NOTBOL notempty set PCRE2_NOTEMPTY @@ -725,11 +935,11 @@ SUBJECT MODIFIERS The partial matching modifiers are provided with abbreviations because they appear frequently in tests. - If the /posix modifier was present on the pattern, causing the POSIX + If the posix modifier was present on the pattern, causing the POSIX wrapper API to be used, the only option-setting modifiers that have any effect are notbol, notempty, and noteol, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec(). - Any other modifiers cause an error. + The other modifiers are ignored, with a warning message. Setting match controls @@ -738,53 +948,64 @@ SUBJECT MODIFIERS line (see above), in which case they apply to every subject line that is matched against that pattern. - aftertext show text after match - allaftertext show text after captures - allcaptures show all captures - allusedtext show all consulted text (non-JIT only) - altglobal alternative global matching - callout_capture show captures at callout time - callout_data= set a value to pass via callouts - callout_fail=[:] control callout failure - callout_none do not supply a callout function - copy= copy captured substring - dfa use pcre2_dfa_match() - find_limits find match and recursion limits - get= extract captured substring - getall extract all captured substrings - /g global global matching - jitstack= set size of JIT stack - mark show mark values - match_limit=>n> set a match limit - memory show memory usage - offset= set starting offset - ovector= set size of output vector - recursion_limit= set a recursion limit - replace= specify a replacement string - startchar show startchar when relevant - zero_terminate pass the subject as zero-terminated + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allusedtext show all consulted text (non-JIT only) + altglobal alternative global matching + callout_capture show captures at callout time + callout_data= set a value to pass via callouts + callout_error=[:] control callout error + callout_fail=[:] control callout failure + callout_none do not supply a callout function + copy= copy captured substring + dfa use pcre2_dfa_match() + find_limits find match and recursion limits + get= extract captured substring + getall extract all captured substrings + /g global global matching + jitstack= set size of JIT stack + mark show mark values + match_limit= set a match limit + memory show memory usage + null_context match with a NULL context + offset= set starting offset + offset_limit= set offset limit + ovector= set size of output vector + recursion_limit= set a recursion limit + replace= specify a replacement string + startchar show startchar when relevant + startoffset= same as offset= + substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + zero_terminate pass the subject as zero-terminated The effects of these modifiers are described in the following sections. + When matching via the POSIX wrapper API, the aftertext, allaftertext, + and ovector subject modifiers work as described below. All other modi- + fiers are either ignored, with a warning message, or cause an error. Showing more text - The aftertext modifier requests that as well as outputting the part of + The aftertext modifier requests that as well as outputting the part of the subject string that matched the entire pattern, pcre2test should in addition output the remainder of the subject string. This is useful for tests where the subject contains multiple copies of the same substring. - The allaftertext modifier requests the same action for captured sub- + The allaftertext modifier requests the same action for captured sub- strings as well as the main matched substring. In each case the remain- der is output on the following line with a plus character following the capture number. - The allusedtext modifier requests that all the text that was consulted - during a successful pattern match by the interpreter should be shown. - This feature is not supported for JIT matching, and if requested with - JIT it is ignored (with a warning message). Setting this modifier + The allusedtext modifier requests that all the text that was consulted + during a successful pattern match by the interpreter should be shown. + This feature is not supported for JIT matching, and if requested with + JIT it is ignored (with a warning message). Setting this modifier affects the output if there is a lookbehind at the start of a match, or - a lookahead at the end, or if \K is used in the pattern. Characters - that precede or follow the start and end of the actual match are indi- - cated in the output by '<' or '>' characters underneath them. Here is + a lookahead at the end, or if \K is used in the pattern. Characters + that precede or follow the start and end of the actual match are indi- + cated in the output by '<' or '>' characters underneath them. Here is an example: re> /(?<=pqr)abc(?=xyz)/ @@ -792,16 +1013,16 @@ SUBJECT MODIFIERS 0: pqrabcxyz <<< >>> - This shows that the matched string is "abc", with the preceding and - following strings "pqr" and "xyz" having been consulted during the + This shows that the matched string is "abc", with the preceding and + following strings "pqr" and "xyz" having been consulted during the match (when processing the assertions). - The startchar modifier requests that the starting character for the - match be indicated, if it is different to the start of the matched + The startchar modifier requests that the starting character for the + match be indicated, if it is different to the start of the matched string. The only time when this occurs is when \K has been processed as part of the match. In this situation, the output for the matched string - is displayed from the starting character instead of from the match - point, with circumflex characters under the earlier characters. For + is displayed from the starting character instead of from the match + point, with circumflex characters under the earlier characters. For example: re> /abc\Kxyz/ @@ -809,7 +1030,7 @@ SUBJECT MODIFIERS 0: abcxyz ^^^ - Unlike allusedtext, the startchar modifier can be used with JIT. How- + Unlike allusedtext, the startchar modifier can be used with JIT. How- ever, these two modifiers are mutually exclusive. Showing the value of all capture groups @@ -817,89 +1038,110 @@ SUBJECT MODIFIERS The allcaptures modifier requests that the values of all potential cap- tured parentheses be output after a match. By default, only those up to the highest one actually used in the match are output (corresponding to - the return code from pcre2_match()). Groups that did not take part in - the match are output as "". + the return code from pcre2_match()). Groups that did not take part in + the match are output as "". This modifier is not relevant for + DFA matching (which does no capturing); it is ignored, with a warning + message, if present. Testing callouts - A callout function is supplied when pcre2test calls the library match- - ing functions, unless callout_none is specified. If callout_capture is - set, the current captured groups are output when a callout occurs. + A callout function is supplied when pcre2test calls the library match- + ing functions, unless callout_none is specified. If callout_capture is + set, the current captured groups are output when a callout occurs. The + default return from the callout function is zero, which allows matching + to continue. - The callout_fail modifier can be given one or two numbers. If there is - only one number, 1 is returned instead of 0 when a callout of that num- - ber is reached. If two numbers are given, 1 is returned when callout - is reached for the th time. Note that callouts with string argu- - ments are always given the number zero. See "Callouts" below for a - description of the output when a callout it taken. + The callout_fail modifier can be given one or two numbers. If there is + only one number, 1 is returned instead of 0 (causing matching to back- + track) when a callout of that number is reached. If two numbers + (:) are given, 1 is returned when callout is reached and + there have been at least callouts. The callout_error modifier is + similar, except that PCRE2_ERROR_CALLOUT is returned, causing the + entire matching process to be aborted. If both these modifiers are set + for the same callout number, callout_error takes precedence. - The callout_data modifier can be given an unsigned or a negative num- - ber. This is set as the "user data" that is passed to the matching - function, and passed back when the callout function is invoked. Any - value other than zero is used as a return from pcre2test's callout + Note that callouts with string arguments are always given the number + zero. See "Callouts" below for a description of the output when a call- + out it taken. + + The callout_data modifier can be given an unsigned or a negative num- + ber. This is set as the "user data" that is passed to the matching + function, and passed back when the callout function is invoked. Any + value other than zero is used as a return from pcre2test's callout function. Finding all matches in a string Searching for all possible matches within a subject can be requested by - the global or /altglobal modifier. After finding a match, the matching - function is called again to search the remainder of the subject. The - difference between global and altglobal is that the former uses the - start_offset argument to pcre2_match() or pcre2_dfa_match() to start - searching at a new point within the entire string (which is what Perl + the global or altglobal modifier. After finding a match, the matching + function is called again to search the remainder of the subject. The + difference between global and altglobal is that the former uses the + start_offset argument to pcre2_match() or pcre2_dfa_match() to start + searching at a new point within the entire string (which is what Perl does), whereas the latter passes over a shortened subject. This makes a difference to the matching process if the pattern begins with a lookbe- hind assertion (including \b or \B). - If an empty string is matched, the next match is done with the + If an empty string is matched, the next match is done with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search for another, non-empty, match at the same point in the subject. If this - match fails, the start offset is advanced, and the normal match is - retried. This imitates the way Perl handles such cases when using the - /g modifier or the split() function. Normally, the start offset is - advanced by one character, but if the newline convention recognizes - CRLF as a newline, and the current character is CR followed by LF, an + match fails, the start offset is advanced, and the normal match is + retried. This imitates the way Perl handles such cases when using the + /g modifier or the split() function. Normally, the start offset is + advanced by one character, but if the newline convention recognizes + CRLF as a newline, and the current character is CR followed by LF, an advance of two characters occurs. Testing substring extraction functions - The copy and get modifiers can be used to test the pcre2_sub- + The copy and get modifiers can be used to test the pcre2_sub- string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be - given more than once, and each can specify a group name or number, for + given more than once, and each can specify a group name or number, for example: abcd\=copy=1,copy=3,get=G1 - If the #subject command is used to set default copy and/or get lists, - these can be unset by specifying a negative number to cancel all num- + If the #subject command is used to set default copy and/or get lists, + these can be unset by specifying a negative number to cancel all num- bered groups and an empty name to cancel all named groups. - The getall modifier tests pcre2_substring_list_get(), which extracts + The getall modifier tests pcre2_substring_list_get(), which extracts all captured substrings. - If the subject line is successfully matched, the substrings extracted - by the convenience functions are output with C, G, or L after the - string number instead of a colon. This is in addition to the normal - full list. The string length (that is, the return from the extraction + If the subject line is successfully matched, the substrings extracted + by the convenience functions are output with C, G, or L after the + string number instead of a colon. This is in addition to the normal + full list. The string length (that is, the return from the extraction function) is given in parentheses after each substring, followed by the name when the extraction was by name. Testing the substitution function - If the replace modifier is set, the pcre2_substitute() function is - called instead of one of the matching functions. Unlike subject - strings, pcre2test does not process replacement strings for escape - sequences. In UTF mode, a replacement string is checked to see if it is - a valid UTF-8 string. If so, it is correctly converted to a UTF string - of the appropriate code unit width. If it is not a valid UTF-8 string, - the individual code units are copied directly. This provides a means of - passing an invalid UTF-8 string for testing purposes. + If the replace modifier is set, the pcre2_substitute() function is + called instead of one of the matching functions. Note that replacement + strings cannot contain commas, because a comma signifies the end of a + modifier. This is not thought to be an issue in a test program. - If the global modifier is set, PCRE2_SUBSTITUTE_GLOBAL is passed to - pcre2_substitute(). After a successful substitution, the modified - string is output, preceded by the number of replacements. This may be - zero if there were no matches. Here is a simple example of a substitu- - tion test: + Unlike subject strings, pcre2test does not process replacement strings + for escape sequences. In UTF mode, a replacement string is checked to + see if it is a valid UTF-8 string. If so, it is correctly converted to + a UTF string of the appropriate code unit width. If it is not a valid + UTF-8 string, the individual code units are copied directly. This pro- + vides a means of passing an invalid UTF-8 string for testing purposes. + + The following modifiers set options (in additional to the normal match + options) for pcre2_substitute(): + + global PCRE2_SUBSTITUTE_GLOBAL + substitute_extended PCRE2_SUBSTITUTE_EXTENDED + substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY + + + After a successful substitution, the modified string is output, pre- + ceded by the number of replacements. This may be zero if there were no + matches. Here is a simple example of a substitution test: /abc/replace=xxx =abc=abc= @@ -907,12 +1149,13 @@ SUBJECT MODIFIERS =abc=abc=\=global 2: =xxx=xxx= - Subject and replacement strings should be kept relatively short for - substitution tests, as fixed-size buffers are used. To make it easy to - test for buffer overflow, if the replacement string starts with a num- - ber in square brackets, that number is passed to pcre2_substitute() as - the size of the output buffer, with the replacement string starting at - the next character. Here is an example that tests the edge case: + Subject and replacement strings should be kept relatively short (fewer + than 256 characters) for substitution tests, as fixed-size buffers are + used. To make it easy to test for buffer overflow, if the replacement + string starts with a number in square brackets, that number is passed + to pcre2_substitute() as the size of the output buffer, with the + replacement string starting at the next character. Here is an example + that tests the edge case: /abc/ 123abc123\=replace=[10]XYZ @@ -920,91 +1163,121 @@ SUBJECT MODIFIERS 123abc123\=replace=[9]XYZ Failed: error -47: no more memory + The default action of pcre2_substitute() is to return + PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if + the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the sub- + stitute_overflow_length modifier), pcre2_substitute() continues to go + through the motions of matching and substituting, in order to compute + the size of buffer that is required. When this happens, pcre2test shows + the required buffer length (which includes space for the trailing zero) + as part of the error message. For example: + + /abc/substitute_overflow_length + 123abc123\=replace=[9]XYZ + Failed: error -47: no more memory: 10 code units are needed + A replacement string is ignored with POSIX and DFA matching. Specifying - partial matching provokes an error return ("bad option value") from + partial matching provokes an error return ("bad option value") from pcre2_substitute(). Setting the JIT stack size - The jitstack modifier provides a way of setting the maximum stack size - that is used by the just-in-time optimization code. It is ignored if + The jitstack modifier provides a way of setting the maximum stack size + that is used by the just-in-time optimization code. It is ignored if JIT optimization is not being used. The value is a number of kilobytes. Providing a stack that is larger than the default 32K is necessary only for very complicated patterns. Setting match and recursion limits - The match_limit and recursion_limit modifiers set the appropriate lim- + The match_limit and recursion_limit modifiers set the appropriate lim- its in the match context. These values are ignored when the find_limits modifier is specified. Finding minimum limits - If the find_limits modifier is present, pcre2test calls pcre2_match() - several times, setting different values in the match context via - pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds - the minimum values for each parameter that allow pcre2_match() to com- + If the find_limits modifier is present, pcre2test calls pcre2_match() + several times, setting different values in the match context via + pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds + the minimum values for each parameter that allow pcre2_match() to com- plete without error. If JIT is being used, only the match limit is relevant. If DFA matching - is being used, neither limit is relevant, and this modifier is ignored + is being used, neither limit is relevant, and this modifier is ignored (with a warning message). - The match_limit number is a measure of the amount of backtracking that - takes place, and learning the minimum value can be instructive. For - most simple matches, the number is quite small, but for patterns with - very large numbers of matching possibilities, it can become large very - quickly with increasing length of subject string. The - match_limit_recursion number is a measure of how much stack (or, if - PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to + The match_limit number is a measure of the amount of backtracking that + takes place, and learning the minimum value can be instructive. For + most simple matches, the number is quite small, but for patterns with + very large numbers of matching possibilities, it can become large very + quickly with increasing length of subject string. The + match_limit_recursion number is a measure of how much stack (or, if + PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to complete the match attempt. Showing MARK names The mark modifier causes the names from backtracking control verbs that - are returned from calls to pcre2_match() to be displayed. If a mark is - returned for a match, non-match, or partial match, pcre2test shows it. - For a match, it is on a line by itself, tagged with "MK:". Otherwise, + are returned from calls to pcre2_match() to be displayed. If a mark is + returned for a match, non-match, or partial match, pcre2test shows it. + For a match, it is on a line by itself, tagged with "MK:". Otherwise, it is added to the non-match message. Showing memory usage - The memory modifier causes pcre2test to log all memory allocation and + The memory modifier causes pcre2test to log all memory allocation and freeing calls that occur during a match operation. Setting a starting offset - The offset modifier sets an offset in the subject string at which + The offset modifier sets an offset in the subject string at which matching starts. Its value is a number of code units, not characters. + Setting an offset limit + + The offset_limit modifier sets a limit for unanchored matches. If a + match cannot be found starting at or before this offset in the subject, + a "no match" return is given. The data value is a number of code units, + not characters. When this modifier is used, the use_offset_limit modi- + fier must have been set for the pattern; if not, an error is generated. + Setting the size of the output vector - The ovector modifier applies only to the subject line in which it - appears, though of course it can also be used to set a default in a - #subject command. It specifies the number of pairs of offsets that are + The ovector modifier applies only to the subject line in which it + appears, though of course it can also be used to set a default in a + #subject command. It specifies the number of pairs of offsets that are available for storing matching information. The default is 15. - A value of zero is useful when testing the POSIX API because it causes + A value of zero is useful when testing the POSIX API because it causes regexec() to be called with a NULL capture vector. When not testing the - POSIX API, a value of zero is used to cause pcre2_match_data_cre- - ate_from_pattern() to be called, in order to create a match block of + POSIX API, a value of zero is used to cause pcre2_match_data_cre- + ate_from_pattern() to be called, in order to create a match block of exactly the right size for the pattern. (It is not possible to create a - match block with a zero-length ovector; there is always at least one + match block with a zero-length ovector; there is always at least one pair of offsets.) Passing the subject as zero-terminated By default, the subject string is passed to a native API matching func- tion with its correct length. In order to test the facility for passing - a zero-terminated string, the zero_terminate modifier is provided. It + a zero-terminated string, the zero_terminate modifier is provided. It causes the length to be passed as PCRE2_ZERO_TERMINATED. (When matching - via the POSIX interface, this modifier has no effect, as there is no + via the POSIX interface, this modifier has no effect, as there is no facility for passing a length.) - When testing pcre2_substitute(), this modifier also has the effect of + When testing pcre2_substitute(), this modifier also has the effect of passing the replacement string as zero-terminated. + Passing a NULL context + + Normally, pcre2test passes a context block to pcre2_match(), + pcre2_dfa_match() or pcre2_jit_match(). If the null_context modifier is + set, however, NULL is passed. This is for testing that the matching + functions behave correctly in this case (they use default values). This + modifier cannot be used with the find_limits modifier or when testing + the substitution function. + THE ALTERNATIVE MATCHING FUNCTION @@ -1069,7 +1342,7 @@ DEFAULT OUTPUT FROM pcre2test If the strings contain any non-printing characters, they are output as \xhh escapes if the value is less than 256 and UTF mode is not set. Otherwise they are output as \x{hh...} escapes. See below for the defi- - nition of non-printing characters. If the /aftertext modifier is set, + nition of non-printing characters. If the aftertext modifier is set, the output for substring 0 is followed by the the rest of the subject string, identified by "0+" like this: @@ -1188,10 +1461,11 @@ CALLOUTS attempt starting at the fourth character of the subject string, when the pointer was at the seventh character, and when the next pattern item was \d. Just one circumflex is output if the start and current - positions are the same. + positions are the same, or if the current position precedes the start + position, which can happen if the callout is in a lookbehind assertion. Callouts numbered 255 are assumed to be automatic callouts, inserted as - a result of the /auto_callout pattern modifier. In this case, instead + a result of the /auto_callout pattern modifier. In this case, instead of showing the callout number, the offset in the pattern, preceded by a plus, is output. For example: @@ -1205,7 +1479,7 @@ CALLOUTS 0: E* If a pattern contains (*MARK) items, an additional line is output when- - ever a change of latest mark is passed to the callout function. For + ever a change of latest mark is passed to the callout function. For example: re> /a(*MARK:X)bc/auto_callout @@ -1219,17 +1493,17 @@ CALLOUTS +12 ^ ^ 0: abc - The mark changes between matching "a" and "b", but stays the same for - the rest of the match, so nothing more is output. If, as a result of - backtracking, the mark reverts to being unset, the text "" is + The mark changes between matching "a" and "b", but stays the same for + the rest of the match, so nothing more is output. If, as a result of + backtracking, the mark reverts to being unset, the text "" is output. Callouts with string arguments The output for a callout with a string argument is similar, except that - instead of outputting a callout number before the position indicators, - the callout string and its offset in the pattern string are output - before the reflection of the subject string, and the subject string is + instead of outputting a callout number before the position indicators, + the callout string and its offset in the pattern string are output + before the reflection of the subject string, and the subject string is reflected for each callout. For example: re> /^ab(?C'first')cd(?C"second")ef/ @@ -1246,41 +1520,46 @@ CALLOUTS NON-PRINTING CHARACTERS When pcre2test is outputting text in the compiled version of a pattern, - bytes other than 32-126 are always treated as non-printing characters + bytes other than 32-126 are always treated as non-printing characters and are therefore shown as hex escapes. - When pcre2test is outputting text that is a matched part of a subject - string, it behaves in the same way, unless a different locale has been - set for the pattern (using the /locale modifier). In this case, the - isprint() function is used to distinguish printing and non-printing + When pcre2test is outputting text that is a matched part of a subject + string, it behaves in the same way, unless a different locale has been + set for the pattern (using the locale modifier). In this case, the + isprint() function is used to distinguish printing and non-printing characters. SAVING AND RESTORING COMPILED PATTERNS - It is possible to save compiled patterns on disc or elsewhere, and + It is possible to save compiled patterns on disc or elsewhere, and reload them later, subject to a number of restrictions. JIT data cannot - be saved. The host on which the patterns are reloaded must be running + be saved. The host on which the patterns are reloaded must be running the same version of PCRE2, with the same code unit width, and must also - have the same endianness, pointer width and PCRE2_SIZE type. Before - compiled patterns can be saved they must be serialized, that is, con- - verted to a stream of bytes. A single byte stream may contain any num- - ber of compiled patterns, but they must all use the same character + have the same endianness, pointer width and PCRE2_SIZE type. Before + compiled patterns can be saved they must be serialized, that is, con- + verted to a stream of bytes. A single byte stream may contain any num- + ber of compiled patterns, but they must all use the same character tables. A single copy of the tables is included in the byte stream (its size is 1088 bytes). - The functions whose names begin with pcre2_serialize_ are used for - serializing and de-serializing. They are described in the pcre2serial- + The functions whose names begin with pcre2_serialize_ are used for + serializing and de-serializing. They are described in the pcre2serial- ize documentation. In this section we describe the features of pcre2test that can be used to test these functions. - When a pattern with push modifier is successfully compiled, it is - pushed onto a stack of compiled patterns, and pcre2test expects the - next line to contain a new pattern (or command) instead of a subject - line. By this means, a number of patterns can be compiled and retained. - The push modifier is incompatible with posix, and control modifiers - that act at match time are ignored (with a message). The jitverify mod- - ifier applies only at compile time. The command + When a pattern with push modifier is successfully compiled, it is + pushed onto a stack of compiled patterns, and pcre2test expects the + next line to contain a new pattern (or command) instead of a subject + line. By contrast, the pushcopy modifier causes a copy of the compiled + pattern to be stacked, leaving the original available for immediate + matching. By using push and/or pushcopy, a number of patterns can be + compiled and retained. These modifiers are incompatible with posix, and + control modifiers that act at match time are ignored (with a message) + for the stacked patterns. The jitverify modifier applies only at com- + pile time. + + The command #save @@ -1297,9 +1576,10 @@ SAVING AND RESTORING COMPILED PATTERNS matched with the pattern, terminated as usual by an empty line or end of file. This command may be followed by a modifier list containing only control modifiers that act after a pattern has been compiled. In - particular, hex, posix, and push are not allowed, nor are any option- - setting modifiers. The JIT modifiers are, however permitted. Here is - an example that saves and reloads two patterns. + particular, hex, posix, posix_nosub, push, and pushcopy are not + allowed, nor are any option-setting modifiers. The JIT modifiers are, + however permitted. Here is an example that saves and reloads two pat- + terns. /abc/push /xyz/push @@ -1311,9 +1591,13 @@ SAVING AND RESTORING COMPILED PATTERNS #pop jit,bincode abc - If jitverify is used with #pop, it does not automatically imply jit, + If jitverify is used with #pop, it does not automatically imply jit, which is different behaviour from when it is used on a pattern. + The #popcopy command is analagous to the pushcopy modifier in that it + makes current a copy of the topmost stack pattern, leaving the original + still on the stack. + SEE ALSO @@ -1330,5 +1614,5 @@ AUTHOR REVISION - Last updated: 20 May 2015 - Copyright (c) 1997-2015 University of Cambridge. + Last updated: 28 December 2016 + Copyright (c) 1997-2016 University of Cambridge. diff --git a/pcre2/doc/pcre2unicode.3 b/pcre2/doc/pcre2unicode.3 index 6c32bc046..253d4b64d 100644 --- a/pcre2/doc/pcre2unicode.3 +++ b/pcre2/doc/pcre2unicode.3 @@ -1,4 +1,4 @@ -.TH PCRE2UNICODE 3 "23 November 2014" "PCRE2 10.00" +.TH PCRE2UNICODE 3 "03 July 2016" "PCRE2 10.22" .SH NAME PCRE - Perl-compatible regular expressions (revised API) .SH "UNICODE AND UTF SUPPORT" @@ -57,17 +57,21 @@ individual code units. In UTF modes, the dot metacharacter matches one UTF character instead of a single code unit. .P -The escape sequence \eC can be used to match a single code unit, in a UTF mode, +The escape sequence \eC can be used to match a single code unit in a UTF mode, but its use can lead to some strange effects because it breaks up multi-unit characters (see the description of \eC in the .\" HREF \fBpcre2pattern\fP .\" -documentation). The use of \eC is not supported in the alternative matching -function \fBpcre2_dfa_match()\fP, nor is it supported in UTF mode by the JIT -optimization. If JIT optimization is requested for a UTF pattern that contains -\eC, it will not succeed, and so the matching will be carried out by the normal -interpretive function. +documentation). +.P +The use of \eC is not supported by the alternative matching function +\fBpcre2_dfa_match()\fP when in UTF-8 or UTF-16 mode, that is, when a character +may consist of more than one code unit. The use of \eC in these modes provokes +a match-time error. Also, the JIT optimization does not support \eC in these +modes. If JIT optimization is requested for a UTF-8 or UTF-16 pattern that +contains \eC, it will not succeed, and so when \fBpcre2_match()\fP is called, +the matching will be carried out by the normal interpretive function. .P The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test characters of any code value, but, by default, the characters that PCRE2 @@ -117,11 +121,21 @@ UTF-16 and UTF-32 strings can indicate their endianness by special code knows as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting strings to be in host byte order. .P -The entire string is checked before any other processing takes place. In -addition to checking the format of the string, there is a check to ensure that -all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area. -The so-called "non-character" code points are not excluded because Unicode -corrigendum #9 makes it clear that they should not be. +A UTF string is checked before any other processing takes place. In the case of +\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP calls with a non-zero starting +offset, the check is applied only to that part of the subject that could be +inspected during matching, and there is a check that the starting offset points +to the first code unit of a character or to the end of the subject. If there +are no lookbehind assertions in the pattern, the check starts at the starting +offset. Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \eb and \eB are +one-character lookbehinds. +.P +In addition to checking the format of the string, there is a check to ensure +that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate +area. The so-called "non-character" code points are not excluded because +Unicode corrigendum #9 makes it clear that they should not be. .P Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, where they are used in pairs to encode code points with values greater than @@ -221,9 +235,9 @@ never occur in a valid UTF-8 string. .sp The following negative error codes are given for invalid UTF-16 strings: .sp - PCRE_UTF16_ERR1 Missing low surrogate at end of string - PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate - PCRE_UTF16_ERR3 Isolated low surrogate + PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string + PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate follows high surrogate + PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate .sp . . @@ -233,8 +247,8 @@ The following negative error codes are given for invalid UTF-16 strings: .sp The following negative error codes are given for invalid UTF-32 strings: .sp - PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff) - PCRE_UTF32_ERR2 Code point is greater than 0x10ffff + PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff) + PCRE2_ERROR_UTF32_ERR2 Code point is greater than 0x10ffff .sp . . @@ -252,6 +266,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 23 November 2014 -Copyright (c) 1997-2014 University of Cambridge. +Last updated: 03 July 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/pcre2/pcre2-config.in b/pcre2/pcre2-config.in index 932160ef5..74271c088 100644 --- a/pcre2/pcre2-config.in +++ b/pcre2/pcre2-config.in @@ -86,7 +86,7 @@ while test $# -gt 0; do ;; --libs-posix) if test @enable_pcre2_8@ = yes ; then - echo $libS$libR -lpcre2posix -lpcre2-8 + echo $libS$libR -lpcre2-posix -lpcre2-8 else echo "${usage}" 1>&2 fi diff --git a/pcre2/perltest.sh b/pcre2/perltest.sh index f011ccc99..50c70e5c2 100755 --- a/pcre2/perltest.sh +++ b/pcre2/perltest.sh @@ -1,14 +1,17 @@ #! /bin/sh # Script for testing regular expressions with perl to check that PCRE2 handles -# them the same. The Perl code has to have "use utf8" and "require Encode" at -# the start when running UTF-8 tests, but *not* for non-utf8 tests. (The -# "require" would actually be OK for non-utf8-tests, but is not always -# installed, so this way the script will always run for these tests.) +# them the same. If the first argument to this script is "-w", Perl is also +# called with "-w", which turns on its warning mode. +# +# The Perl code has to have "use utf8" and "require Encode" at the start when +# running UTF-8 tests, but *not* for non-utf8 tests. (The "require" would +# actually be OK for non-utf8-tests, but is not always installed, so this way +# the script will always run for these tests.) # # The desired effect is achieved by making this a shell script that passes the -# Perl script to Perl through a pipe. If the first argument is "-utf8", a -# suitable prefix is set up. +# Perl script to Perl through a pipe. If the first argument (possibly after +# removing "-w") is "-utf8", a suitable prefix is set up. # # The remaining arguments, if any, are passed to Perl. They are an input file # and an output file. If there is one argument, the output is written to @@ -17,7 +20,14 @@ # of the contorted piping input.) perl=perl +perlarg='' prefix='' + +if [ $# -gt 0 -a "$1" = "-w" ] ; then + perlarg="-w" + shift +fi + if [ $# -gt 0 -a "$1" = "-utf8" ] ; then prefix="use utf8; require Encode;" shift @@ -204,12 +214,14 @@ for (;;) printf "data> " if $interact; last NEXT_RE if ! ($_ = <$infile>); chomp; - printf $outfile "$_\n" if ! $interact; + printf $outfile "%s", "$_\n" if ! $interact; s/\s+$//; # Remove trailing space s/^\s+//; # Remove leading space last if ($_ eq ""); + next if $_ =~ /^\\=(?:\s|$)/; # Comment line + $x = eval "\"$_\""; # To get escapes processed # Empty array for holding results, ensure $REGERROR and $REGMARK are @@ -290,6 +302,6 @@ for (;;) # printf $outfile "\n"; PERLEND -) | $perl - $@ +) | $perl $perlarg - $@ # End diff --git a/pcre2/src/config.h.generic b/pcre2/src/config.h.generic index 0f9da50ce..3315b7770 100644 --- a/pcre2/src/config.h.generic +++ b/pcre2/src/config.h.generic @@ -78,6 +78,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_MEMORY_H */ +/* Define to 1 if you have the `mkostemp' function. */ +/* #undef HAVE_MKOSTEMP */ + /* Define if you have POSIX threads libraries and header files. */ /* #undef HAVE_PTHREAD */ @@ -90,6 +93,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_READLINE_READLINE_H */ +/* Define to 1 if you have the `secure_getenv' function. */ +/* #undef HAVE_SECURE_GETENV */ + /* Define to 1 if you have the header file. */ /* #undef HAVE_STDINT_H */ @@ -111,6 +117,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_SYS_TYPES_H */ +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_WAIT_H */ + /* Define to 1 if you have the header file. */ /* #undef HAVE_UNISTD_H */ @@ -182,6 +191,9 @@ sure both macros are undefined; an emulation function will then be used. */ #define MAX_NAME_SIZE 32 #endif +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +/* #undef NEVER_BACKSLASH_C */ + /* The value of NEWLINE_DEFAULT determines the default newline character sequence. PCRE2 client programs can override this by selecting other values at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5 @@ -200,7 +212,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.20" +#define PACKAGE_STRING "PCRE2 10.23" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -209,7 +221,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.20" +#define PACKAGE_VERSION "10.23" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -218,15 +230,24 @@ sure both macros are undefined; an emulation function will then be used. */ #define PARENS_NEST_LIMIT 250 #endif -/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by - pcre2grep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcre2grep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ #ifndef PCRE2GREP_BUFSIZE #define PCRE2GREP_BUFSIZE 20480 #endif +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#ifndef PCRE2GREP_MAX_BUFSIZE +#define PCRE2GREP_MAX_BUFSIZE 1048576 +#endif + /* Define to any value to include debugging code. */ /* #undef PCRE2_DEBUG */ @@ -268,7 +289,11 @@ sure both macros are undefined; an emulation function will then be used. */ is able to handle .gz files. */ /* #undef SUPPORT_LIBZ */ -/* Define to any value to enable JIT support in pcre2grep. */ +/* Define to any value to enable callout script support in pcre2grep. */ +/* #undef SUPPORT_PCRE2GREP_CALLOUT */ + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ /* #undef SUPPORT_PCRE2GREP_JIT */ /* Define to any value to enable the 16 bit PCRE2 library. */ @@ -289,8 +314,39 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value for valgrind support to find invalid memory reads. */ /* #undef SUPPORT_VALGRIND */ +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif + /* Version number of package */ -#define VERSION "10.20" +#define VERSION "10.23" + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/pcre2/src/config.h.in b/pcre2/src/config.h.in index e3ef2fddc..e04b209bf 100644 --- a/pcre2/src/config.h.in +++ b/pcre2/src/config.h.in @@ -78,6 +78,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the `mkostemp' function. */ +#undef HAVE_MKOSTEMP + /* Define if you have POSIX threads libraries and header files. */ #undef HAVE_PTHREAD @@ -90,6 +93,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_READLINE_READLINE_H +/* Define to 1 if you have the `secure_getenv' function. */ +#undef HAVE_SECURE_GETENV + /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H @@ -111,6 +117,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_WAIT_H + /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H @@ -169,6 +178,9 @@ sure both macros are undefined; an emulation function will then be used. */ overflow caused by enormously large patterns. */ #undef MAX_NAME_SIZE +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +#undef NEVER_BACKSLASH_C + /* The value of NEWLINE_DEFAULT determines the default newline character sequence. PCRE2 client programs can override this by selecting other values at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5 @@ -201,13 +213,20 @@ sure both macros are undefined; an emulation function will then be used. */ stack that is used while compiling a pattern. */ #undef PARENS_NEST_LIMIT -/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by - pcre2grep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcre2grep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ #undef PCRE2GREP_BUFSIZE +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#undef PCRE2GREP_MAX_BUFSIZE + /* to make a symbol visible */ #undef PCRE2POSIX_EXP_DECL @@ -259,7 +278,11 @@ sure both macros are undefined; an emulation function will then be used. */ is able to handle .gz files. */ #undef SUPPORT_LIBZ -/* Define to any value to enable JIT support in pcre2grep. */ +/* Define to any value to enable callout script support in pcre2grep. */ +#undef SUPPORT_PCRE2GREP_CALLOUT + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ #undef SUPPORT_PCRE2GREP_JIT /* Define to any value to enable the 16 bit PCRE2 library. */ @@ -280,9 +303,41 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value for valgrind support to find invalid memory reads. */ #undef SUPPORT_VALGRIND +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif + + /* Version number of package */ #undef VERSION +/* Define to 1 if on MINIX. */ +#undef _MINIX + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +#undef _POSIX_1_SOURCE + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +#undef _POSIX_SOURCE + /* Define to empty if `const' does not conform to ANSI C. */ #undef const diff --git a/pcre2/src/dftables.c b/pcre2/src/dftables.c index b6417cc2e..dfb90b594 100644 --- a/pcre2/src/dftables.c +++ b/pcre2/src/dftables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -102,7 +102,7 @@ fprintf(f, "/* This file was automatically written by the dftables auxiliary\n" "program. It contains character tables that are used when no external\n" "tables are passed to PCRE2 by the application that calls it. The tables\n" - "are used only for characters whose code values are less than 256.\n\n"); + "are used only for characters whose code values are less than 256. */\n\n"); /* Force config.h in z/OS */ @@ -115,7 +115,7 @@ fprintf(f, #endif fprintf(f, - "The following #includes are present because without them gcc 4.x may remove\n" + "/* The following #includes are present because without them gcc 4.x may remove\n" "the array definition from the final binary if PCRE2 is built into a static\n" "library and dead code stripping is activated. This leads to link errors.\n" "Pulling in the header ensures that the array gets flagged as \"someone\n" diff --git a/pcre2/src/pcre2.h.generic b/pcre2/src/pcre2.h.generic index 3e97fb8bf..86503208e 100644 --- a/pcre2/src/pcre2.h.generic +++ b/pcre2/src/pcre2.h.generic @@ -5,7 +5,7 @@ /* This is the public header file for the PCRE library, second API, to be #included by applications that call PCRE2 functions. - Copyright (c) 2015 University of Cambridge + Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -36,15 +36,15 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -#ifndef _PCRE2_H -#define _PCRE2_H +#ifndef PCRE2_H_IDEMPOTENT_GUARD +#define PCRE2_H_IDEMPOTENT_GUARD /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 20 +#define PCRE2_MINOR 23 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2015-06-30 +#define PCRE2_DATE 2017-02-14 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -67,6 +67,20 @@ don't change existing definitions of PCRE2_EXP_DECL. */ # endif #endif +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order so make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + /* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and uint8_t, UCHAR_MAX, etc are defined. */ @@ -120,6 +134,8 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_UTF 0x00080000u /* C J M D */ #define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ #define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ +#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ +#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */ /* These are for pcre2_jit_compile(). */ @@ -144,9 +160,19 @@ sanity checks). */ #define PCRE2_DFA_RESTART 0x00000040u #define PCRE2_DFA_SHORTEST 0x00000080u -/* This is an additional option for pcre2_substitute(). */ +/* These are additional options for pcre2_substitute(), which passes any others +through to pcre2_match(). */ -#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u +#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u +#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u +#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u +#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u +#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u + +/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(), +ignored for pcre2_jit_match(). */ + +#define PCRE2_NO_JIT 0x00002000u /* Newline and \R settings, for use in compile contexts. The newline values must be kept in step with values set in config.h and both sets must all be @@ -233,6 +259,13 @@ numbers must not be changed. */ #define PCRE2_ERROR_RECURSIONLIMIT (-53) #define PCRE2_ERROR_UNAVAILABLE (-54) #define PCRE2_ERROR_UNSET (-55) +#define PCRE2_ERROR_BADOFFSETLIMIT (-56) +#define PCRE2_ERROR_BADREPESCAPE (-57) +#define PCRE2_ERROR_REPMISSINGBRACE (-58) +#define PCRE2_ERROR_BADSUBSTITUTION (-59) +#define PCRE2_ERROR_BADSUBSPATTERN (-60) +#define PCRE2_ERROR_TOOMANYREPLACE (-61) +#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) /* Request types for pcre2_pattern_info() */ @@ -259,6 +292,7 @@ numbers must not be changed. */ #define PCRE2_INFO_NEWLINE 20 #define PCRE2_INFO_RECURSIONLIMIT 21 #define PCRE2_INFO_SIZE 22 +#define PCRE2_INFO_HASBACKSLASHC 23 /* Request types for pcre2_config(). */ @@ -291,6 +325,7 @@ define special values to indicate zero-terminated strings and unset offsets in the offset vector (ovector). */ #define PCRE2_SIZE size_t +#define PCRE2_SIZE_MAX SIZE_MAX #define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0) #define PCRE2_UNSET (~(PCRE2_SIZE)0) @@ -365,164 +400,192 @@ expanded for each width below. Start with functions that give general information. */ #define PCRE2_GENERAL_INFO_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_config(uint32_t, void *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *); /* Functions for manipulating contexts. */ #define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_general_context *pcre2_general_context_create( \ - void *(*)(PCRE2_SIZE, void *), \ - void (*)(void *, void *), void *); \ -PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *); +PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ + *pcre2_general_context_copy(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ + *pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \ + void (*)(void *, void *), void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_general_context_free(pcre2_general_context *); #define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \ -PCRE2_EXP_DECL \ - pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ -PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ - const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\ - pcre2_compile_context *, int (*)(uint32_t, void *), \ - void *); +PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ + *pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ + *pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_compile_context_free(pcre2_compile_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_newline(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ + int (*)(uint32_t, void *), void *); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \ -PCRE2_EXP_DECL \ - pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ -PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ -PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ - int (*)(pcre2_callout_block *, void *), void *); \ -PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ - pcre2_match_context *, void *(*)(PCRE2_SIZE, void *), \ - void (*)(void *, void *), void *); +PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ + *pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ + *pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_callout(pcre2_match_context *, \ + int (*)(pcre2_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_memory_management(pcre2_match_context *, \ + void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *); /* Functions concerned with compiling a pattern to PCRE internal code. */ #define PCRE2_COMPILE_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \ - int *, PCRE2_SIZE *, pcre2_compile_context *); \ -PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ + pcre2_compile_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_code_free(pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_code_copy(const pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_code_copy_with_tables(const pcre2_code *); /* Functions that give information about a compiled pattern. */ #define PCRE2_PATTERN_INFO_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \ - void *); \ -PCRE2_EXP_DECL int pcre2_callout_enumerate(const pcre2_code *, \ - int (*)(pcre2_callout_enumerate_block *, void *), \ - void *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_callout_enumerate(const pcre2_code *, \ + int (*)(pcre2_callout_enumerate_block *, void *), void *); /* Functions for running a match and inspecting the result. */ #define PCRE2_MATCH_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_match_data_create(uint32_t, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_match_data_create_from_pattern(\ - const pcre2_code *, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \ - PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *, int *, \ - PCRE2_SIZE); \ -PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *); \ -PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *); +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create_from_pattern(const pcre2_code *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_data_free(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \ + pcre2_get_mark(pcre2_match_data *); \ +PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_count(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + *pcre2_get_ovector_pointer(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_startchar(pcre2_match_data *); /* Convenience functions for handling matched substrings. */ #define PCRE2_SUBSTRING_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \ - uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \ -PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \ -PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \ - uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \ - uint32_t, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_substring_number_from_name(\ - const pcre2_code *, PCRE2_SPTR); \ -PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \ - PCRE2_UCHAR ***, PCRE2_SIZE **); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_free(PCRE2_UCHAR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \ + PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_list_free(PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); /* Functions for serializing / deserializing compiled patterns. */ #define PCRE2_SERIALIZE_FUNCTIONS \ -PCRE2_EXP_DECL int32_t pcre2_serialize_encode(const pcre2_code **, \ - int32_t, uint8_t **, PCRE2_SIZE *, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL int32_t pcre2_serialize_decode(pcre2_code **, int32_t, \ - const uint8_t *, pcre2_general_context *); \ -PCRE2_EXP_DECL int32_t pcre2_serialize_get_number_of_codes(const uint8_t *); \ -PCRE2_EXP_DECL void pcre2_serialize_free(uint8_t *); +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \ + PCRE2_SIZE *, pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_get_number_of_codes(const uint8_t *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_serialize_free(uint8_t *); /* Convenience function for match + substitute. */ #define PCRE2_SUBSTITUTE_FUNCTION \ -PCRE2_EXP_DECL int pcre2_substitute(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, \ - PCRE2_SIZE *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \ + PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *); /* Functions for JIT processing */ #define PCRE2_JIT_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *); \ -PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_match_context *, \ - pcre2_jit_callback, void *); \ -PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_compile(pcre2_code *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_free_unused_memory(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \ + *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_free(pcre2_jit_stack *); /* Other miscellaneous functions. */ #define PCRE2_OTHER_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ -PCRE2_EXP_DECL \ - const uint8_t *pcre2_maketables(pcre2_general_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ +PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \ + *pcre2_maketables(pcre2_general_context *); \ /* Define macros that generate width-specific names from generic versions. The @@ -567,6 +630,8 @@ pcre2_compile are called by application code. */ /* Functions: the complete list in alphabetical order */ #define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_) +#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_) +#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_) #define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) #define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) #define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) @@ -606,8 +671,10 @@ pcre2_compile are called by application code. */ #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) +#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) +#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) @@ -699,4 +766,6 @@ PCRE2_SUFFIX a no-op. Otherwise, generate an error. */ } /* extern "C" */ #endif -#endif /* End of pcre2.h */ +#endif /* PCRE2_H_IDEMPOTENT_GUARD */ + +/* End of pcre2.h */ diff --git a/pcre2/src/pcre2.h.in b/pcre2/src/pcre2.h.in index 94fbdd5b3..96c29ffd8 100644 --- a/pcre2/src/pcre2.h.in +++ b/pcre2/src/pcre2.h.in @@ -5,7 +5,7 @@ /* This is the public header file for the PCRE library, second API, to be #included by applications that call PCRE2 functions. - Copyright (c) 2015 University of Cambridge + Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -36,8 +36,8 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -#ifndef _PCRE2_H -#define _PCRE2_H +#ifndef PCRE2_H_IDEMPOTENT_GUARD +#define PCRE2_H_IDEMPOTENT_GUARD /* The current PCRE version information. */ @@ -67,6 +67,20 @@ don't change existing definitions of PCRE2_EXP_DECL. */ # endif #endif +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order so make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + /* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and uint8_t, UCHAR_MAX, etc are defined. */ @@ -120,6 +134,8 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_UTF 0x00080000u /* C J M D */ #define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ #define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ +#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ +#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */ /* These are for pcre2_jit_compile(). */ @@ -144,9 +160,19 @@ sanity checks). */ #define PCRE2_DFA_RESTART 0x00000040u #define PCRE2_DFA_SHORTEST 0x00000080u -/* This is an additional option for pcre2_substitute(). */ +/* These are additional options for pcre2_substitute(), which passes any others +through to pcre2_match(). */ -#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u +#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u +#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u +#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u +#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u +#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u + +/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(), +ignored for pcre2_jit_match(). */ + +#define PCRE2_NO_JIT 0x00002000u /* Newline and \R settings, for use in compile contexts. The newline values must be kept in step with values set in config.h and both sets must all be @@ -233,6 +259,13 @@ numbers must not be changed. */ #define PCRE2_ERROR_RECURSIONLIMIT (-53) #define PCRE2_ERROR_UNAVAILABLE (-54) #define PCRE2_ERROR_UNSET (-55) +#define PCRE2_ERROR_BADOFFSETLIMIT (-56) +#define PCRE2_ERROR_BADREPESCAPE (-57) +#define PCRE2_ERROR_REPMISSINGBRACE (-58) +#define PCRE2_ERROR_BADSUBSTITUTION (-59) +#define PCRE2_ERROR_BADSUBSPATTERN (-60) +#define PCRE2_ERROR_TOOMANYREPLACE (-61) +#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) /* Request types for pcre2_pattern_info() */ @@ -259,6 +292,7 @@ numbers must not be changed. */ #define PCRE2_INFO_NEWLINE 20 #define PCRE2_INFO_RECURSIONLIMIT 21 #define PCRE2_INFO_SIZE 22 +#define PCRE2_INFO_HASBACKSLASHC 23 /* Request types for pcre2_config(). */ @@ -291,6 +325,7 @@ define special values to indicate zero-terminated strings and unset offsets in the offset vector (ovector). */ #define PCRE2_SIZE size_t +#define PCRE2_SIZE_MAX SIZE_MAX #define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0) #define PCRE2_UNSET (~(PCRE2_SIZE)0) @@ -365,164 +400,192 @@ expanded for each width below. Start with functions that give general information. */ #define PCRE2_GENERAL_INFO_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_config(uint32_t, void *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *); /* Functions for manipulating contexts. */ #define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_general_context *pcre2_general_context_create( \ - void *(*)(PCRE2_SIZE, void *), \ - void (*)(void *, void *), void *); \ -PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *); +PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ + *pcre2_general_context_copy(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ + *pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \ + void (*)(void *, void *), void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_general_context_free(pcre2_general_context *); #define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \ -PCRE2_EXP_DECL \ - pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ -PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ - const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\ - pcre2_compile_context *, int (*)(uint32_t, void *), \ - void *); +PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ + *pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ + *pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_compile_context_free(pcre2_compile_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_newline(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ + int (*)(uint32_t, void *), void *); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \ -PCRE2_EXP_DECL \ - pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ -PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ -PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ - int (*)(pcre2_callout_block *, void *), void *); \ -PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ - pcre2_match_context *, void *(*)(PCRE2_SIZE, void *), \ - void (*)(void *, void *), void *); +PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ + *pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ + *pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_callout(pcre2_match_context *, \ + int (*)(pcre2_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_memory_management(pcre2_match_context *, \ + void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *); /* Functions concerned with compiling a pattern to PCRE internal code. */ #define PCRE2_COMPILE_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \ - int *, PCRE2_SIZE *, pcre2_compile_context *); \ -PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ + pcre2_compile_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_code_free(pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_code_copy(const pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_code_copy_with_tables(const pcre2_code *); /* Functions that give information about a compiled pattern. */ #define PCRE2_PATTERN_INFO_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \ - void *); \ -PCRE2_EXP_DECL int pcre2_callout_enumerate(const pcre2_code *, \ - int (*)(pcre2_callout_enumerate_block *, void *), \ - void *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_callout_enumerate(const pcre2_code *, \ + int (*)(pcre2_callout_enumerate_block *, void *), void *); /* Functions for running a match and inspecting the result. */ #define PCRE2_MATCH_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_match_data_create(uint32_t, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_match_data_create_from_pattern(\ - const pcre2_code *, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \ - PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *, int *, \ - PCRE2_SIZE); \ -PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *); \ -PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *); +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create_from_pattern(const pcre2_code *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_data_free(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \ + pcre2_get_mark(pcre2_match_data *); \ +PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_count(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + *pcre2_get_ovector_pointer(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_startchar(pcre2_match_data *); /* Convenience functions for handling matched substrings. */ #define PCRE2_SUBSTRING_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \ - uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \ -PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \ -PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \ - uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \ - uint32_t, PCRE2_SIZE *); \ -PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_substring_number_from_name(\ - const pcre2_code *, PCRE2_SPTR); \ -PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \ - PCRE2_UCHAR ***, PCRE2_SIZE **); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_free(PCRE2_UCHAR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \ + PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_list_free(PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); /* Functions for serializing / deserializing compiled patterns. */ #define PCRE2_SERIALIZE_FUNCTIONS \ -PCRE2_EXP_DECL int32_t pcre2_serialize_encode(const pcre2_code **, \ - int32_t, uint8_t **, PCRE2_SIZE *, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL int32_t pcre2_serialize_decode(pcre2_code **, int32_t, \ - const uint8_t *, pcre2_general_context *); \ -PCRE2_EXP_DECL int32_t pcre2_serialize_get_number_of_codes(const uint8_t *); \ -PCRE2_EXP_DECL void pcre2_serialize_free(uint8_t *); +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \ + PCRE2_SIZE *, pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_get_number_of_codes(const uint8_t *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_serialize_free(uint8_t *); /* Convenience function for match + substitute. */ #define PCRE2_SUBSTITUTE_FUNCTION \ -PCRE2_EXP_DECL int pcre2_substitute(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, \ - PCRE2_SIZE *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \ + PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *); /* Functions for JIT processing */ #define PCRE2_JIT_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ - pcre2_match_data *, pcre2_match_context *); \ -PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_match_context *, \ - pcre2_jit_callback, void *); \ -PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *); +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_compile(pcre2_code *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_free_unused_memory(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \ + *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_free(pcre2_jit_stack *); /* Other miscellaneous functions. */ #define PCRE2_OTHER_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ -PCRE2_EXP_DECL \ - const uint8_t *pcre2_maketables(pcre2_general_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ +PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \ + *pcre2_maketables(pcre2_general_context *); \ /* Define macros that generate width-specific names from generic versions. The @@ -567,6 +630,8 @@ pcre2_compile are called by application code. */ /* Functions: the complete list in alphabetical order */ #define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_) +#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_) +#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_) #define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) #define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) #define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) @@ -606,8 +671,10 @@ pcre2_compile are called by application code. */ #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) +#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) +#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) @@ -699,4 +766,6 @@ PCRE2_SUFFIX a no-op. Otherwise, generate an error. */ } /* extern "C" */ #endif -#endif /* End of pcre2.h */ +#endif /* PCRE2_H_IDEMPOTENT_GUARD */ + +/* End of pcre2.h */ diff --git a/pcre2/src/pcre2_auto_possess.c b/pcre2/src/pcre2_auto_possess.c index e99a2c44f..64ec6dfbb 100644 --- a/pcre2/src/pcre2_auto_possess.c +++ b/pcre2/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -91,6 +91,7 @@ static const uint8_t autoposstab[APTROWS][APTCOLS] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */ }; +#ifdef SUPPORT_UNICODE /* This table is used to check whether auto-possessification is possible between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The left-hand (repeated) opcode is used to select the row, and the right-hand @@ -170,64 +171,7 @@ static const uint8_t posspropstab[3][4] = { { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */ { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */ }; - -/* This table is used when converting repeating opcodes into possessified -versions as a result of an explicit possessive quantifier such as ++. A zero -value means there is no possessified version - in those cases the item in -question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT -because all relevant opcodes are less than that. */ - -static const uint8_t opcode_possessify[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 15 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 - 31 */ - - 0, /* NOTI */ - OP_POSSTAR, 0, /* STAR, MINSTAR */ - OP_POSPLUS, 0, /* PLUS, MINPLUS */ - OP_POSQUERY, 0, /* QUERY, MINQUERY */ - OP_POSUPTO, 0, /* UPTO, MINUPTO */ - 0, /* EXACT */ - 0, 0, 0, 0, /* POS{STAR,PLUS,QUERY,UPTO} */ - - OP_POSSTARI, 0, /* STARI, MINSTARI */ - OP_POSPLUSI, 0, /* PLUSI, MINPLUSI */ - OP_POSQUERYI, 0, /* QUERYI, MINQUERYI */ - OP_POSUPTOI, 0, /* UPTOI, MINUPTOI */ - 0, /* EXACTI */ - 0, 0, 0, 0, /* POS{STARI,PLUSI,QUERYI,UPTOI} */ - - OP_NOTPOSSTAR, 0, /* NOTSTAR, NOTMINSTAR */ - OP_NOTPOSPLUS, 0, /* NOTPLUS, NOTMINPLUS */ - OP_NOTPOSQUERY, 0, /* NOTQUERY, NOTMINQUERY */ - OP_NOTPOSUPTO, 0, /* NOTUPTO, NOTMINUPTO */ - 0, /* NOTEXACT */ - 0, 0, 0, 0, /* NOTPOS{STAR,PLUS,QUERY,UPTO} */ - - OP_NOTPOSSTARI, 0, /* NOTSTARI, NOTMINSTARI */ - OP_NOTPOSPLUSI, 0, /* NOTPLUSI, NOTMINPLUSI */ - OP_NOTPOSQUERYI, 0, /* NOTQUERYI, NOTMINQUERYI */ - OP_NOTPOSUPTOI, 0, /* NOTUPTOI, NOTMINUPTOI */ - 0, /* NOTEXACTI */ - 0, 0, 0, 0, /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */ - - OP_TYPEPOSSTAR, 0, /* TYPESTAR, TYPEMINSTAR */ - OP_TYPEPOSPLUS, 0, /* TYPEPLUS, TYPEMINPLUS */ - OP_TYPEPOSQUERY, 0, /* TYPEQUERY, TYPEMINQUERY */ - OP_TYPEPOSUPTO, 0, /* TYPEUPTO, TYPEMINUPTO */ - 0, /* TYPEEXACT */ - 0, 0, 0, 0, /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */ - - OP_CRPOSSTAR, 0, /* CRSTAR, CRMINSTAR */ - OP_CRPOSPLUS, 0, /* CRPLUS, CRMINPLUS */ - OP_CRPOSQUERY, 0, /* CRQUERY, CRMINQUERY */ - OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */ - 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */ - - 0, 0, 0, /* CLASS, NCLASS, XCLASS */ - 0, 0, /* REF, REFI */ - 0, 0, /* DNREF, DNREFI */ - 0, 0 /* RECURSE, CALLOUT */ -}; +#endif /* SUPPORT_UNICODE */ @@ -645,6 +589,7 @@ for(;;) case OP_ASSERTBACK_NOT: case OP_ONCE: case OP_ONCE_NC: + /* Atomic sub-patterns and assertions can always auto-possessify their last iterator. However, if the group was entered as a result of checking a previous iterator, this is not possible. */ @@ -662,6 +607,9 @@ for(;;) next_code = code + GET(code, 1); code += PRIV(OP_lengths)[c]; + /* Check each branch. We have to recurse a level for all but the last + branch. */ + while (*next_code == OP_ALT) { if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit)) @@ -1102,8 +1050,10 @@ but some compilers complain about an unreachable statement. */ /* Replaces single character iterations with their possessive alternatives if appropriate. This function modifies the compiled opcode! Hitting a -non-existant opcode may indicate a bug in PCRE2, but it can also be caused if a -bad UTF string was compiled with PCRE2_NO_UTF_CHECK. +non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a +bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches +overly complicated or large patterns. In these cases, the check just stops, +leaving the remainder of the pattern unpossessified. Arguments: code points to start of the byte code @@ -1117,11 +1067,11 @@ Returns: 0 for success int PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb) { -register PCRE2_UCHAR c; +PCRE2_UCHAR c; PCRE2_SPTR end; PCRE2_UCHAR *repeat_opcode; uint32_t list[8]; -int rec_limit; +int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */ for (;;) { @@ -1136,7 +1086,6 @@ for (;;) get_chr_property_list(code, utf, cb->fcc, list) : NULL; list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; - rec_limit = 1000; if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit)) { switch(c) @@ -1193,7 +1142,6 @@ for (;;) list[1] = (c & 1) == 0; - rec_limit = 1000; if (compare_opcodes(end, utf, cb, list, end, &rec_limit)) { switch (c) diff --git a/pcre2/src/pcre2_compile.c b/pcre2/src/pcre2_compile.c index 4a9e42e2c..6d98a68ca 100644 --- a/pcre2/src/pcre2_compile.c +++ b/pcre2/src/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -58,9 +58,14 @@ POSSIBILITY OF SUCH DAMAGE. #define PRINTABLE(c) ((c) >= 32 && (c) < 127) #endif #include "pcre2_printint.c" -#define CALL_PRINTINT +#define DEBUG_CALL_PRINTINT #endif +/* Other debugging code can be enabled by these defines. */ + +// #define DEBUG_SHOW_CAPTURES +// #define DEBUG_SHOW_PARSED + /* There are a few things that vary with different code unit sizes. Handle them by defining macros in order to minimize #if usage. */ @@ -79,16 +84,56 @@ by defining macros in order to minimize #if usage. */ #endif #endif +/* Macros to store and retrieve a PCRE2_SIZE value in the parsed pattern, which +consists of uint32_t elements. Assume that if uint32_t can't hold it, two of +them will be able to (i.e. assume a 64-bit world). */ + +#if PCRE2_SIZE_MAX <= UINT32_MAX +#define PUTOFFSET(s,p) *p++ = s +#define GETOFFSET(s,p) s = *p++ +#define GETPLUSOFFSET(s,p) s = *(++p) +#define READPLUSOFFSET(s,p) s = p[1] +#define SKIPOFFSET(p) p++ +#define SIZEOFFSET 1 +#else +#define PUTOFFSET(s,p) \ + { *p++ = (uint32_t)(s >> 32); *p++ = (uint32_t)(s & 0xffffffff); } +#define GETOFFSET(s,p) \ + { s = ((PCRE2_SIZE)p[0] << 32) | (PCRE2_SIZE)p[1]; p += 2; } +#define GETPLUSOFFSET(s,p) \ + { s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; p += 2; } +#define READPLUSOFFSET(s,p) \ + { s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; } +#define SKIPOFFSET(p) p += 2 +#define SIZEOFFSET 2 +#endif + +/* Macros for manipulating elements of the parsed pattern vector. */ + +#define META_CODE(x) (x & 0xffff0000u) +#define META_DATA(x) (x & 0x0000ffffu) +#define META_DIFF(x,y) ((x-y)>>16) + /* Function definitions to allow mutual recursion */ +#ifdef SUPPORT_UNICODE +static unsigned int + add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, + compile_block *, const uint32_t *, unsigned int); +#endif + static int - add_list_to_class(uint8_t *, PCRE2_UCHAR **, uint32_t, compile_block *, - const uint32_t *, unsigned int); + compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, + uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *, + compile_block *, PCRE2_SIZE *); + +static int + get_branchlength(uint32_t **, int *, int *, parsed_recurse_check *, + compile_block *); static BOOL - compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL, - uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *, - branch_chain *, compile_block *, size_t *); + set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *, + compile_block *); @@ -96,26 +141,35 @@ static BOOL * Code parameters and static tables * *************************************************/ -/* This value specifies the size of stack workspace, which is used during the -pre-compile phase when determining how much memory is required. The regex is -partly compiled into this space, but the compiled parts are discarded as soon -as they can be, so that hopefully there will never be an overrun. The code -does, however, check for an overrun. The largest amount I've seen used is 218, -so this number is very generous. +#define MAX_GROUP_NUMBER 65535u +#define MAX_REPEAT_COUNT 65535u +#define REPEAT_UNLIMITED (MAX_REPEAT_COUNT+1) -The same workspace is used during the second, actual compile phase for -remembering forward references to groups so that they can be filled in at the -end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE -is 4 there is plenty of room for most patterns. However, the memory can get -filled up by repetitions of forward references, for example patterns like -/(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so -that the workspace is expanded in this situation. The value below is therefore -a minimum, and we put a maximum on it for safety. The minimum is now also -defined in terms of LINK_SIZE so that the size increase kicks in at the same -number of forward references in all cases. */ +/* COMPILE_WORK_SIZE specifies the size of stack workspace, which is used in +different ways in the different pattern scans. The parsing and group- +identifying pre-scan uses it to handle nesting, and needs it to be 16-bit +aligned for this. Having defined the size in code units, we set up +C16_WORK_SIZE as the number of elements in the 16-bit vector. -#define COMPILE_WORK_SIZE (2048*LINK_SIZE) -#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE) +During the first compiling phase, when determining how much memory is required, +the regex is partly compiled into this space, but the compiled parts are +discarded as soon as they can be, so that hopefully there will never be an +overrun. The code does, however, check for an overrun, which can occur for +pathological patterns. The size of the workspace depends on LINK_SIZE because +the length of compiled items varies with this. + +In the real compile phase, this workspace is not currently used. */ + +#define COMPILE_WORK_SIZE (2048*LINK_SIZE) /* Size in code units */ + +#define C16_WORK_SIZE \ + ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint16_t)) + +/* A uint32_t vector is used for caching information about the size of +capturing groups, to improve performance. A default is created on the stack of +this size. */ + +#define GROUPINFO_DEFAULT_SIZE 256 /* The overrun tests check for a slightly smaller size so that they detect the overrun before it actually does run off the end of the data block. */ @@ -129,28 +183,186 @@ value is the number of slots in the list. */ #define NAMED_GROUP_LIST_SIZE 20 -/* The original PCRE required patterns to be zero-terminated, and it simplifies -the compiling code if it is guaranteed that there is a zero code unit at the -end of the pattern, because this means that tests for coding sequences such as -(*SKIP) or even just (?<= can check a sequence of code units without having to -keep checking for the end of the pattern. The new PCRE2 API allows zero code -units within patterns if a positive length is given, but in order to keep most -of the compiling code as it was, we copy such patterns and add a zero on the -end. This value determines the size of space on the stack that is used if the -pattern fits; if not, heap memory is used. */ +/* The pre-compiling pass over the pattern creates a parsed pattern in a vector +of uint32_t. For short patterns this lives on the stack, with this size. Heap +memory is used for longer patterns. */ -#define COPIED_PATTERN_SIZE 1024 +#define PARSED_PATTERN_DEFAULT_SIZE 1024 /* Maximum length value to check against when making sure that the variable that holds the compiled pattern length does not overflow. We make it a bit less -than INT_MAX to allow for adding in group terminating bytes, so that we don't -have to check them every time. */ +than INT_MAX to allow for adding in group terminating code units, so that we +don't have to check them every time. */ #define OFLOW_MAX (INT_MAX - 20) -/* Macro for setting individual bits in class bitmaps. */ +/* Code values for parsed patterns, which are stored in a vector of 32-bit +unsigned ints. Values less than META_END are literal data values. The coding +for identifying the item is in the top 16-bits, leaving 16 bits for the +additional data that some of them need. The META_CODE, META_DATA, and META_DIFF +macros are used to manipulate parsed pattern elements. -#define SETBIT(a,b) a[(b)/8] |= (1 << ((b)&7)) +NOTE: When these definitions are changed, the table of extra lengths for each +code (meta_extra_lengths, just below) must be updated to remain in step. */ + +#define META_END 0x80000000u /* End of pattern */ + +#define META_ALT 0x80010000u /* alternation */ +#define META_ATOMIC 0x80020000u /* atomic group */ +#define META_BACKREF 0x80030000u /* Back ref */ +#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */ +#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */ +#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */ +#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */ +#define META_CAPTURE 0x80080000u /* Capturing parenthesis */ +#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */ +#define META_CLASS 0x800a0000u /* start non-empty class */ +#define META_CLASS_EMPTY 0x800b0000u /* empty class */ +#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */ +#define META_CLASS_END 0x800d0000u /* end of non-empty class */ +#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */ +#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */ +#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */ +#define META_COND_NAME 0x80110000u /* (?()... */ +#define META_COND_NUMBER 0x80120000u /* (?(digits)... */ +#define META_COND_RNAME 0x80130000u /* (?(R&name)... */ +#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */ +#define META_COND_VERSION 0x80150000u /* (?(VERSIONx.y)... */ +#define META_DOLLAR 0x80160000u /* $ metacharacter */ +#define META_DOT 0x80170000u /* . metacharacter */ +#define META_ESCAPE 0x80180000u /* \d and friends */ +#define META_KET 0x80190000u /* closing parenthesis */ +#define META_NOCAPTURE 0x801a0000u /* no capture parens */ +#define META_OPTIONS 0x801b0000u /* (?i) and friends */ +#define META_POSIX 0x801c0000u /* POSIX class item */ +#define META_POSIX_NEG 0x801d0000u /* negative POSIX class item */ +#define META_RANGE_ESCAPED 0x801e0000u /* range with at least one escape */ +#define META_RANGE_LITERAL 0x801f0000u /* range defined literally */ +#define META_RECURSE 0x80200000u /* Recursion */ +#define META_RECURSE_BYNAME 0x80210000u /* (?&name) */ + +/* These must be kept together to make it easy to check that an assertion +is present where expected in a conditional group. */ + +#define META_LOOKAHEAD 0x80220000u /* (?= */ +#define META_LOOKAHEADNOT 0x80230000u /* (?! */ +#define META_LOOKBEHIND 0x80240000u /* (?<= */ +#define META_LOOKBEHINDNOT 0x80250000u /* (?= 10 */ + 1+SIZEOFFSET, /* META_BACKREF_BYNAME */ + 1, /* META_BIGVALUE */ + 3, /* META_CALLOUT_NUMBER */ + 3+SIZEOFFSET, /* META_CALLOUT_STRING */ + 0, /* META_CAPTURE */ + 0, /* META_CIRCUMFLEX */ + 0, /* META_CLASS */ + 0, /* META_CLASS_EMPTY */ + 0, /* META_CLASS_EMPTY_NOT */ + 0, /* META_CLASS_END */ + 0, /* META_CLASS_NOT */ + 0, /* META_COND_ASSERT */ + SIZEOFFSET, /* META_COND_DEFINE */ + 1+SIZEOFFSET, /* META_COND_NAME */ + 1+SIZEOFFSET, /* META_COND_NUMBER */ + 1+SIZEOFFSET, /* META_COND_RNAME */ + 1+SIZEOFFSET, /* META_COND_RNUMBER */ + 3, /* META_COND_VERSION */ + 0, /* META_DOLLAR */ + 0, /* META_DOT */ + 0, /* META_ESCAPE - more for ESC_P, ESC_p, ESC_g, ESC_k */ + 0, /* META_KET */ + 0, /* META_NOCAPTURE */ + 1, /* META_OPTIONS */ + 1, /* META_POSIX */ + 1, /* META_POSIX_NEG */ + 0, /* META_RANGE_ESCAPED */ + 0, /* META_RANGE_LITERAL */ + SIZEOFFSET, /* META_RECURSE */ + 1+SIZEOFFSET, /* META_RECURSE_BYNAME */ + 0, /* META_LOOKAHEAD */ + 0, /* META_LOOKAHEADNOT */ + SIZEOFFSET, /* META_LOOKBEHIND */ + SIZEOFFSET, /* META_LOOKBEHINDNOT */ + 1, /* META_MARK - plus the string length */ + 0, /* META_ACCEPT */ + 0, /* META_COMMIT */ + 0, /* META_FAIL */ + 0, /* META_PRUNE */ + 1, /* META_PRUNE_ARG - plus the string length */ + 0, /* META_SKIP */ + 1, /* META_SKIP_ARG - plus the string length */ + 0, /* META_THEN */ + 1, /* META_THEN_ARG - plus the string length */ + 0, /* META_ASTERISK */ + 0, /* META_ASTERISK_PLUS */ + 0, /* META_ASTERISK_QUERY */ + 0, /* META_PLUS */ + 0, /* META_PLUS_PLUS */ + 0, /* META_PLUS_QUERY */ + 0, /* META_QUERY */ + 0, /* META_QUERY_PLUS */ + 0, /* META_QUERY_QUERY */ + 2, /* META_MINMAX */ + 2, /* META_MINMAX_PLUS */ + 2 /* META_MINMAX_QUERY */ +}; + +/* Types for skipping parts of a parsed pattern. */ + +enum { PSKIP_ALT, PSKIP_CLASS, PSKIP_KET }; + +/* Macro for setting individual bits in class bitmaps. It took some +experimenting to figure out how to stop gcc 5.3.0 from warning with +-Wconversion. This version gets a warning: + + #define SETBIT(a,b) a[(b)/8] |= (uint8_t)(1 << ((b)&7)) + +Let's hope the apparently less efficient version isn't actually so bad if the +compiler is clever with identical subexpressions. */ + +#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1 << ((b)&7))) /* Private flags added to firstcu and reqcu. */ @@ -160,10 +372,11 @@ have to check them every time. */ #define REQ_UNSET (-2) /* Not yet found anything */ #define REQ_NONE (-1) /* Found not fixed char */ -/* This bit (which is greater than any UTF value) is used to indicate that a -variable contains a number of code units instead of an actual code point. */ +/* These flags are used in the groupinfo vector. */ -#define UTF_LENGTH 0x10000000l +#define GI_SET_FIXED_LENGTH 0x80000000u +#define GI_NOT_FIXED_LENGTH 0x40000000u +#define GI_FIXED_LENGTH_MASK 0x0000ffffu /* This simple test for a decimal digit works for both ASCII/Unicode and EBCDIC and is fast (a good compiler can turn it into a subtraction and unsigned @@ -175,8 +388,8 @@ comparison). */ locale, and may mark arbitrary characters as digits. We want to recognize only 0-9, a-z, and A-Z as hex digits, which is why we have a private table here. It costs 256 bytes, but it is a lot faster than doing character value tests (at -least in some simple cases I timed), and in some applications one wants PCRE to -compile efficiently as well as match efficiently. The value in the table is +least in some simple cases I timed), and in some applications one wants PCRE2 +to compile efficiently as well as match efficiently. The value in the table is the binary hex digit value, or 0xff for non-hex digits. */ /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in @@ -270,7 +483,7 @@ in UTF-8 mode. It runs from '0' to 'z'. */ #ifndef EBCDIC #define ESCAPES_FIRST CHAR_0 #define ESCAPES_LAST CHAR_z -#define ESCAPES_UPPER_CASE (-32) /* Add this to upper case a letter */ +#define UPPER_CASE(c) (c-32) static const short int escapes[] = { 0, 0, @@ -323,11 +536,11 @@ because it is defined as 'a', which of course picks up the ASCII value. */ #if 'a' == 0x81 /* Check for a real EBCDIC environment */ #define ESCAPES_FIRST CHAR_a #define ESCAPES_LAST CHAR_9 -#define ESCAPES_UPPER_CASE (+64) /* Add this to upper case a letter */ +#define UPPER_CASE(c) (c+64) #else /* Testing in an ASCII environment */ #define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */ #define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */ -#define ESCAPES_UPPER_CASE (-32) /* Add this to upper case a letter */ +#define UPPER_CASE(c) (c-32) #endif static const short int escapes[] = { @@ -364,9 +577,9 @@ string is built from string macros so that it works in UTF-8 mode on EBCDIC platforms. */ typedef struct verbitem { - int len; /* Length of verb name */ - int op; /* Op when no arg, or -1 if arg mandatory */ - int op_arg; /* Op when arg present, or -1 if not allowed */ + unsigned int len; /* Length of verb name */ + uint32_t meta; /* Base META_ code */ + int has_arg; /* Argument requirement */ } verbitem; static const char verbnames[] = @@ -381,32 +594,30 @@ static const char verbnames[] = STRING_THEN; static const verbitem verbs[] = { - { 0, -1, OP_MARK }, - { 4, -1, OP_MARK }, - { 6, OP_ACCEPT, -1 }, - { 6, OP_COMMIT, -1 }, - { 1, OP_FAIL, -1 }, - { 4, OP_FAIL, -1 }, - { 5, OP_PRUNE, OP_PRUNE_ARG }, - { 4, OP_SKIP, OP_SKIP_ARG }, - { 4, OP_THEN, OP_THEN_ARG } + { 0, META_MARK, +1 }, /* > 0 => must have an argument */ + { 4, META_MARK, +1 }, + { 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */ + { 6, META_COMMIT, -1 }, + { 1, META_FAIL, -1 }, + { 4, META_FAIL, -1 }, + { 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */ + { 4, META_SKIP, 0 }, + { 4, META_THEN, 0 } }; static const int verbcount = sizeof(verbs)/sizeof(verbitem); +/* Verb opcodes, indexed by their META code offset from META_MARK. */ -/* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in -another regex library. */ +static const uint32_t verbops[] = { + OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP, + OP_SKIP_ARG, OP_THEN, OP_THEN_ARG }; -static const PCRE2_UCHAR sub_start_of_word[] = { - CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, - CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' }; - -static const PCRE2_UCHAR sub_end_of_word[] = { - CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, - CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, - CHAR_RIGHT_PARENTHESIS, '\0' }; +/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */ +static uint32_t chartypeoffset[] = { + OP_STAR - OP_STAR, OP_STARI - OP_STAR, + OP_NOTSTAR - OP_STAR, OP_NOTSTARI - OP_STAR }; /* Tables of names of POSIX character classes and their lengths. The names are now all in a single string, to reduce the number of relocations when a shared @@ -428,7 +639,6 @@ static const uint8_t posix_name_lengths[] = { #define PC_PRINT 9 #define PC_PUNCT 10 - /* Table of class bit maps for each POSIX class. Each class is formed from a base map, with an optional addition or removal of another map. Then, for some classes, there is some additional tweaking: for [:blank:] the vertical space @@ -456,134 +666,46 @@ static const int posix_class_maps[] = { cbit_xdigit,-1, 0 /* xdigit */ }; -/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by -Unicode property escapes. */ - #ifdef SUPPORT_UNICODE -static const PCRE2_UCHAR string_PNd[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_pNd[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_PXsp[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_pXsp[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_PXwd[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_pXwd[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static PCRE2_SPTR substitutes[] = { - string_PNd, /* \D */ - string_pNd, /* \d */ - string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */ - string_pXsp, /* \s */ /* space and POSIX space are the same. */ - string_PXwd, /* \W */ - string_pXwd /* \w */ +/* The POSIX class Unicode property substitutes that are used in UCP mode must +be in the order of the POSIX class names, defined above. */ + +static int posix_substitutes[] = { + PT_GC, ucp_L, /* alpha */ + PT_PC, ucp_Ll, /* lower */ + PT_PC, ucp_Lu, /* upper */ + PT_ALNUM, 0, /* alnum */ + -1, 0, /* ascii, treat as non-UCP */ + -1, 1, /* blank, treat as \h */ + PT_PC, ucp_Cc, /* cntrl */ + PT_PC, ucp_Nd, /* digit */ + PT_PXGRAPH, 0, /* graph */ + PT_PXPRINT, 0, /* print */ + PT_PXPUNCT, 0, /* punct */ + PT_PXSPACE, 0, /* space */ /* Xps is POSIX space, but from 8.34 */ + PT_WORD, 0, /* word */ /* Perl and POSIX space are the same */ + -1, 0 /* xdigit, treat as non-UCP */ }; - -/* The POSIX class substitutes must be in the order of the POSIX class names, -defined above, and there are both positive and negative cases. NULL means no -general substitute of a Unicode property escape (\p or \P). However, for some -POSIX classes (e.g. graph, print, punct) a special property code is compiled -directly. */ - -static const PCRE2_UCHAR string_pCc[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_pL[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_pLl[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_pLu[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_pXan[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_h[] = { - CHAR_BACKSLASH, CHAR_h, '\0' }; -static const PCRE2_UCHAR string_pXps[] = { - CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_PCc[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_PL[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_PLl[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_PLu[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_PXan[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; -static const PCRE2_UCHAR string_H[] = { - CHAR_BACKSLASH, CHAR_H, '\0' }; -static const PCRE2_UCHAR string_PXps[] = { - CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, - CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; - -static PCRE2_SPTR posix_substitutes[] = { - string_pL, /* alpha */ - string_pLl, /* lower */ - string_pLu, /* upper */ - string_pXan, /* alnum */ - NULL, /* ascii */ - string_h, /* blank */ - string_pCc, /* cntrl */ - string_pNd, /* digit */ - NULL, /* graph */ - NULL, /* print */ - NULL, /* punct */ - string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */ - string_pXwd, /* word */ /* Perl and POSIX space are the same */ - NULL, /* xdigit */ - /* Negated cases */ - string_PL, /* ^alpha */ - string_PLl, /* ^lower */ - string_PLu, /* ^upper */ - string_PXan, /* ^alnum */ - NULL, /* ^ascii */ - string_H, /* ^blank */ - string_PCc, /* ^cntrl */ - string_PNd, /* ^digit */ - NULL, /* ^graph */ - NULL, /* ^print */ - NULL, /* ^punct */ - string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */ - string_PXwd, /* ^word */ /* Perl and POSIX space are the same */ - NULL /* ^xdigit */ -}; -#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *)) +#define POSIX_SUBSIZE (sizeof(posix_substitutes) / (2*sizeof(uint32_t))) #endif /* SUPPORT_UNICODE */ /* Masks for checking option settings. */ #define PUBLIC_COMPILE_OPTIONS \ (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ - PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY|PCRE2_DOTALL| \ - PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE|PCRE2_MATCH_UNSET_BACKREF| \ - PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C|PCRE2_NEVER_UCP| \ - PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE|PCRE2_NO_AUTO_POSSESS| \ - PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE|PCRE2_NO_UTF_CHECK| \ - PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF) + PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ + PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ + PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ + PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \ + PCRE2_UTF) /* Compile time error code numbers. They are given names so that they can more easily be tracked. When a new number is added, the tables called eint1 and -eint2 in pcre2posix.c must be updated, and a new error text must be added to -compile_error_texts in pcre2_error.c. */ +eint2 in pcre2posix.c may need to be updated, and a new error text must be +added to compile_error_texts in pcre2_error.c. */ enum { ERR0 = COMPILE_ERROR_BASE, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, @@ -594,7 +716,7 @@ enum { ERR0 = COMPILE_ERROR_BASE, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, - ERR81, ERR82, ERR83, ERR84 }; + ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90 }; /* This is a table of start-of-pattern options such as (*UTF) and settings such as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -697,6 +819,337 @@ static const uint8_t opcode_possessify[] = { }; +#ifdef DEBUG_SHOW_PARSED +/************************************************* +* Show the parsed pattern for debugging * +*************************************************/ + +/* For debugging the pre-scan, this code, which outputs the parsed data vector, +can be enabled. */ + +static void show_parsed(compile_block *cb) +{ +uint32_t *pptr = cb->parsed_pattern; + +for (;;) + { + int max, min; + PCRE2_SIZE offset; + uint32_t i; + uint32_t length; + uint32_t meta_arg = META_DATA(*pptr); + + fprintf(stderr, "+++ %02d %.8x ", (int)(pptr - cb->parsed_pattern), *pptr); + + if (*pptr < META_END) + { + if (*pptr > 32 && *pptr < 128) fprintf(stderr, "%c", *pptr); + pptr++; + } + + else switch (META_CODE(*pptr++)) + { + default: + fprintf(stderr, "**** OOPS - unknown META value - giving up ****\n"); + return; + + case META_END: + fprintf(stderr, "META_END\n"); + return; + + case META_CAPTURE: + fprintf(stderr, "META_CAPTURE %d", meta_arg); + break; + + case META_RECURSE: + GETOFFSET(offset, pptr); + fprintf(stderr, "META_RECURSE %d %zd", meta_arg, offset); + break; + + case META_BACKREF: + if (meta_arg < 10) + offset = cb->small_ref_offset[meta_arg]; + else + GETOFFSET(offset, pptr); + fprintf(stderr, "META_BACKREF %d %zd", meta_arg, offset); + break; + + case META_ESCAPE: + if (meta_arg == ESC_P || meta_arg == ESC_p) + { + uint32_t ptype = *pptr >> 16; + uint32_t pvalue = *pptr++ & 0xffff; + fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? 'P':'p', + ptype, pvalue); + } + else + { + uint32_t cc; + /* There's just one escape we might have here that isn't negated in the + escapes table. */ + if (meta_arg == ESC_g) cc = CHAR_g; + else for (cc = ESCAPES_FIRST; cc <= ESCAPES_LAST; cc++) + { + if (meta_arg == (uint32_t)(-escapes[cc - ESCAPES_FIRST])) break; + } + if (cc > ESCAPES_LAST) cc = CHAR_QUESTION_MARK; + fprintf(stderr, "META \\%c", cc); + } + break; + + case META_MINMAX: + min = *pptr++; + max = *pptr++; + if (max != REPEAT_UNLIMITED) + fprintf(stderr, "META {%d,%d}", min, max); + else + fprintf(stderr, "META {%d,}", min); + break; + + case META_MINMAX_QUERY: + min = *pptr++; + max = *pptr++; + if (max != REPEAT_UNLIMITED) + fprintf(stderr, "META {%d,%d}?", min, max); + else + fprintf(stderr, "META {%d,}?", min); + break; + + case META_MINMAX_PLUS: + min = *pptr++; + max = *pptr++; + if (max != REPEAT_UNLIMITED) + fprintf(stderr, "META {%d,%d}+", min, max); + else + fprintf(stderr, "META {%d,}+", min); + break; + + case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break; + case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break; + case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break; + case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break; + case META_DOT: fprintf(stderr, "META_DOT"); break; + case META_ASTERISK: fprintf(stderr, "META *"); break; + case META_ASTERISK_QUERY: fprintf(stderr, "META *?"); break; + case META_ASTERISK_PLUS: fprintf(stderr, "META *+"); break; + case META_PLUS: fprintf(stderr, "META +"); break; + case META_PLUS_QUERY: fprintf(stderr, "META +?"); break; + case META_PLUS_PLUS: fprintf(stderr, "META ++"); break; + case META_QUERY: fprintf(stderr, "META ?"); break; + case META_QUERY_QUERY: fprintf(stderr, "META ??"); break; + case META_QUERY_PLUS: fprintf(stderr, "META ?+"); break; + + case META_ATOMIC: fprintf(stderr, "META (?>"); break; + case META_NOCAPTURE: fprintf(stderr, "META (?:"); break; + case META_LOOKAHEAD: fprintf(stderr, "META (?="); break; + case META_LOOKAHEADNOT: fprintf(stderr, "META (?!"); break; + case META_KET: fprintf(stderr, "META )"); break; + case META_ALT: fprintf(stderr, "META | %d", meta_arg); break; + + case META_CLASS: fprintf(stderr, "META ["); break; + case META_CLASS_NOT: fprintf(stderr, "META [^"); break; + case META_CLASS_END: fprintf(stderr, "META ]"); break; + case META_CLASS_EMPTY: fprintf(stderr, "META []"); break; + case META_CLASS_EMPTY_NOT: fprintf(stderr, "META [^]"); break; + + case META_RANGE_LITERAL: fprintf(stderr, "META - (literal)"); break; + case META_RANGE_ESCAPED: fprintf(stderr, "META - (escaped)"); break; + + case META_POSIX: fprintf(stderr, "META_POSIX %d", *pptr++); break; + case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break; + + case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break; + case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break; + case META_FAIL: fprintf(stderr, "META (*FAIL)"); break; + case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break; + case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; + case META_THEN: fprintf(stderr, "META (*THEN)"); break; + + case META_OPTIONS: fprintf(stderr, "META_OPTIONS 0x%02x", *pptr++); break; + + case META_LOOKBEHIND: + fprintf(stderr, "META (?<= %d offset=", meta_arg); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + case META_LOOKBEHINDNOT: + fprintf(stderr, "META (?="); + fprintf(stderr, "%d.", *pptr++); + fprintf(stderr, "%d)", *pptr++); + break; + + case META_COND_NAME: + fprintf(stderr, "META (?() length=%d offset=", *pptr++); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + case META_COND_RNAME: + fprintf(stderr, "META (?(R&name) length=%d offset=", *pptr++); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + /* This is kept as a name, because it might be. */ + + case META_COND_RNUMBER: + fprintf(stderr, "META (?(Rnumber) length=%d offset=", *pptr++); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + case META_MARK: + fprintf(stderr, "META (*MARK:"); + goto SHOWARG; + + case META_PRUNE_ARG: + fprintf(stderr, "META (*PRUNE:"); + goto SHOWARG; + + case META_SKIP_ARG: + fprintf(stderr, "META (*SKIP:"); + goto SHOWARG; + + case META_THEN_ARG: + fprintf(stderr, "META (*THEN:"); + SHOWARG: + length = *pptr++; + for (i = 0; i < length; i++) + { + uint32_t cc = *pptr++; + if (cc > 32 && cc < 128) fprintf(stderr, "%c", cc); + else fprintf(stderr, "\\x{%x}", cc); + } + fprintf(stderr, ") length=%u", length); + break; + } + fprintf(stderr, "\n"); + } +return; +} +#endif /* DEBUG_SHOW_PARSED */ + + + +/************************************************* +* Copy compiled code * +*************************************************/ + +/* Compiled JIT code cannot be copied, so the new compiled block has no +associated JIT data. */ + +PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION +pcre2_code_copy(const pcre2_code *code) +{ +PCRE2_SIZE* ref_count; +pcre2_code *newcode; + +if (code == NULL) return NULL; +newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data); +if (newcode == NULL) return NULL; +memcpy(newcode, code, code->blocksize); +newcode->executable_jit = NULL; + +/* If the code is one that has been deserialized, increment the reference count +in the decoded tables. */ + +if ((code->flags & PCRE2_DEREF_TABLES) != 0) + { + ref_count = (PCRE2_SIZE *)(code->tables + tables_length); + (*ref_count)++; + } + +return newcode; +} + + + +/************************************************* +* Copy compiled code and character tables * +*************************************************/ + +/* Compiled JIT code cannot be copied, so the new compiled block has no +associated JIT data. This version of code_copy also makes a separate copy of +the character tables. */ + +PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION +pcre2_code_copy_with_tables(const pcre2_code *code) +{ +PCRE2_SIZE* ref_count; +pcre2_code *newcode; +uint8_t *newtables; + +if (code == NULL) return NULL; +newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data); +if (newcode == NULL) return NULL; +memcpy(newcode, code, code->blocksize); +newcode->executable_jit = NULL; + +newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE), + code->memctl.memory_data); +if (newtables == NULL) + { + code->memctl.free((void *)newcode, code->memctl.memory_data); + return NULL; + } +memcpy(newtables, code->tables, tables_length); +ref_count = (PCRE2_SIZE *)(newtables + tables_length); +*ref_count = 1; + +newcode->tables = newtables; +newcode->flags |= PCRE2_DEREF_TABLES; +return newcode; +} + + /************************************************* * Free compiled code * @@ -734,418 +1187,2928 @@ if (code != NULL) /************************************************* -* Insert an automatic callout point * +* Read a number, possibly signed * *************************************************/ -/* This function is called when the PCRE2_AUTO_CALLOUT option is set, to insert -callout points before each pattern item. +/* This function is used to read numbers in the pattern. The initial pointer +must be the sign or first digit of the number. When relative values (introduced +by + or -) are allowed, they are relative group numbers, and the result must be +greater than zero. Arguments: - code current code pointer - ptr current pattern pointer - cb general compile-time data + ptrptr points to the character pointer variable + ptrend points to the end of the input string + allow_sign if < 0, sign not allowed; if >= 0, sign is relative to this + max_value the largest number allowed + max_error the error to give for an over-large number + intptr where to put the result + errcodeptr where to put an error code -Returns: new code pointer +Returns: TRUE - a number was read + FALSE - errorcode == 0 => no number was found + errorcode != 0 => an error occurred */ -static PCRE2_UCHAR * -auto_callout(PCRE2_UCHAR *code, PCRE2_SPTR ptr, compile_block *cb) +static BOOL +read_number(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, int32_t allow_sign, + uint32_t max_value, uint32_t max_error, int *intptr, int *errorcodeptr) { -code[0] = OP_CALLOUT; -PUT(code, 1, ptr - cb->start_pattern); /* Pattern offset */ -PUT(code, 1 + LINK_SIZE, 0); /* Default length */ -code[1 + 2*LINK_SIZE] = 255; -return code + PRIV(OP_lengths)[OP_CALLOUT]; +int sign = 0; +uint32_t n = 0; +PCRE2_SPTR ptr = *ptrptr; +BOOL yield = FALSE; + +*errorcodeptr = 0; + +if (allow_sign >= 0 && ptr < ptrend) + { + if (*ptr == CHAR_PLUS) + { + sign = +1; + max_value -= allow_sign; + ptr++; + } + else if (*ptr == CHAR_MINUS) + { + sign = -1; + ptr++; + } + } + +if (ptr >= ptrend || !IS_DIGIT(*ptr)) return FALSE; +while (ptr < ptrend && IS_DIGIT(*ptr)) + { + n = n * 10 + *ptr++ - CHAR_0; + if (n > max_value) + { + *errorcodeptr = max_error; + goto EXIT; + } + } + +if (allow_sign >= 0 && sign != 0) + { + if (n == 0) + { + *errorcodeptr = ERR26; /* +0 and -0 are not allowed */ + goto EXIT; + } + + if (sign > 0) n += allow_sign; + else if ((int)n > allow_sign) + { + *errorcodeptr = ERR15; /* Non-existent subpattern */ + goto EXIT; + } + else n = allow_sign + 1 - n; + } + +yield = TRUE; + +EXIT: +*intptr = n; +*ptrptr = ptr; +return yield; } /************************************************* -* Complete a callout item * +* Read repeat counts * *************************************************/ -/* A callout item contains the length of the next item in the pattern, which -we can't fill in till after we have reached the relevant point. This is used -for both automatic and manual callouts. +/* Read an item of the form {n,m} and return the values if non-NULL pointers +are supplied. Repeat counts must be less than 65536 (MAX_REPEAT_COUNT); a +larger value is used for "unlimited". We have to use signed arguments for +read_number() because it is capable of returning a signed value. Arguments: - previous_callout points to previous callout item - ptr current pattern pointer - cb general compile-time data + ptrptr points to pointer to character after'{' + ptrend pointer to end of input + minp if not NULL, pointer to int for min + maxp if not NULL, pointer to int for max (-1 if no max) + returned as -1 if no max + errorcodeptr points to error code variable -Returns: nothing +Returns: FALSE if not a repeat quantifier, errorcode set zero + FALSE on error, with errorcode set non-zero + TRUE on success, with pointer updated to point after '}' */ -static void -complete_callout(PCRE2_UCHAR *previous_callout, PCRE2_SPTR ptr, - compile_block *cb) +static BOOL +read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp, + uint32_t *maxp, int *errorcodeptr) { -size_t length = ptr - cb->start_pattern - GET(previous_callout, 1); -PUT(previous_callout, 1 + LINK_SIZE, length); +PCRE2_SPTR p = *ptrptr; +BOOL yield = FALSE; +int32_t min = 0; +int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */ + +/* NB read_number() initializes the error code to zero. The only error is for a +number that is too big. */ + +if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr)) + goto EXIT; + +if (p >= ptrend) goto EXIT; + +if (*p == CHAR_RIGHT_CURLY_BRACKET) + { + p++; + max = min; + } + +else + { + if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT; + if (*p != CHAR_RIGHT_CURLY_BRACKET) + { + if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max, + errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) + goto EXIT; + if (max < min) + { + *errorcodeptr = ERR4; + goto EXIT; + } + } + p++; + } + +yield = TRUE; +if (minp != NULL) *minp = (uint32_t)min; +if (maxp != NULL) *maxp = (uint32_t)max; + +/* Update the pattern pointer on success, or after an error, but not when +the result is "not a repeat quantifier". */ + +EXIT: +if (yield || *errorcodeptr != 0) *ptrptr = p; +return yield; + + + } /************************************************* -* Find the fixed length of a branch * +* Handle escapes * *************************************************/ -/* Scan a branch and compute the fixed length of subject that will match it, if -the length is fixed. This is needed for dealing with backward assertions. In -UTF mode, the result is in code units rather than bytes. The branch is -temporarily terminated with OP_END when this function is called. +/* This function is called when a \ has been encountered. It either returns a +positive value for a simple escape such as \d, or 0 for a data character, which +is placed in chptr. A backreference to group n is returned as negative n. On +entry, ptr is pointing at the character after \. On exit, it points after the +final code unit of the escape sequence. -This function is called when a backward assertion is encountered, so that if it -fails, the error message can point to the correct place in the pattern. -However, we cannot do this when the assertion contains subroutine calls, -because they can be forward references. We solve this by remembering this case -and doing the check at the end; a flag specifies which mode we are running in. +This function is also called from pcre2_substitute() to handle escape sequences +in replacement strings. In this case, the cb argument is NULL, and in the case +of escapes that have further processing, only sequences that define a data +character are recognised. The isclass argument is not relevant; the options +argument is the final value of the compiled pattern's options. Arguments: - code points to the start of the pattern (the bracket) - utf TRUE in UTF mode - atend TRUE if called when the pattern is complete - cb the "compile data" structure - recurses chain of recurse_check to catch mutual recursion + ptrptr points to the input position pointer + ptrend points to the end of the input + chptr points to a returned data character + errorcodeptr points to the errorcode variable (containing zero) + options the current options bits + isclass TRUE if inside a character class + cb compile data block -Returns: the fixed length, - or -1 if there is no fixed length, - or -2 if \C was encountered (in UTF-8 mode only) - or -3 if an OP_RECURSE item was encountered and atend is FALSE - or -4 if an unknown opcode was encountered (internal error) +Returns: zero => a data character + positive => a special escape sequence + negative => a numerical back reference + on error, errorcodeptr is set non-zero +*/ + +int +PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, + int *errorcodeptr, uint32_t options, BOOL isclass, compile_block *cb) +{ +BOOL utf = (options & PCRE2_UTF) != 0; +PCRE2_SPTR ptr = *ptrptr; +uint32_t c, cc; +int escape = 0; +int i; + +/* If backslash is at the end of the string, it's an error. */ + +if (ptr >= ptrend) + { + *errorcodeptr = ERR1; + return 0; + } + +GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ +*errorcodeptr = 0; /* Be optimistic */ + +/* Non-alphanumerics are literals, so we just leave the value in c. An initial +value test saves a memory lookup for code points outside the alphanumeric +range. Otherwise, do a table lookup. A non-zero result is something that can be +returned immediately. Otherwise further processing is required. */ + +if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */ + +else if ((i = escapes[c - ESCAPES_FIRST]) != 0) + { + if (i > 0) c = (uint32_t)i; else /* Positive is a data character */ + { + escape = -i; /* Else return a special escape */ + if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X)) + cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */ + } + } + +/* Escapes that need further processing, including those that are unknown. +When called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u +when BSUX is set). */ + +else + { + PCRE2_SPTR oldptr; + BOOL overflow; + int s; + + /* Filter calls from pcre2_substitute(). */ + + if (cb == NULL && c != CHAR_c && c != CHAR_o && c != CHAR_x && + (c != CHAR_u || (options & PCRE2_ALT_BSUX) != 0)) + { + *errorcodeptr = ERR3; + return 0; + } + + switch (c) + { + /* A number of Perl escapes are not handled by PCRE. We give an explicit + error. */ + + case CHAR_l: + case CHAR_L: + *errorcodeptr = ERR37; + break; + + /* \u is unrecognized when PCRE2_ALT_BSUX is not set. When it is treated + specially, \u must be followed by four hex digits. Otherwise it is a + lowercase u letter. */ + + case CHAR_u: + if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; else + { + uint32_t xc; + if (ptrend - ptr < 4) break; /* Less than 4 chars */ + if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */ + if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ + cc = (cc << 4) | xc; + if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */ + cc = (cc << 4) | xc; + if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */ + c = (cc << 4) | xc; + ptr += 4; + if (utf) + { + if (c > 0x10ffffU) *errorcodeptr = ERR77; + else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; + } + else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; + } + break; + + /* \U is unrecognized unless PCRE2_ALT_BSUX is set, in which case it is an + upper case letter. */ + + case CHAR_U: + if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; + break; + + /* In a character class, \g is just a literal "g". Outside a character + class, \g must be followed by one of a number of specific things: + + (1) A number, either plain or braced. If positive, it is an absolute + backreference. If negative, it is a relative backreference. This is a Perl + 5.10 feature. + + (2) Perl 5.10 also supports \g{name} as a reference to a named group. This + is part of Perl's movement towards a unified syntax for back references. As + this is synonymous with \k{name}, we fudge it up by pretending it really + was \k{name}. + + (3) For Oniguruma compatibility we also support \g followed by a name or a + number either in angle brackets or in single quotes. However, these are + (possibly recursive) subroutine calls, _not_ backreferences. We return + the ESC_g code. + + Summary: Return a negative number for a numerical back reference, ESC_k for + a named back reference, and ESC_g for a named or numbered subroutine call. + */ + + case CHAR_g: + if (isclass) break; + + if (ptr >= ptrend) + { + *errorcodeptr = ERR57; + break; + } + + if (*ptr == CHAR_LESS_THAN_SIGN || *ptr == CHAR_APOSTROPHE) + { + escape = ESC_g; + break; + } + + /* If there is a brace delimiter, try to read a numerical reference. If + there isn't one, assume we have a name and treat it as \k. */ + + if (*ptr == CHAR_LEFT_CURLY_BRACKET) + { + PCRE2_SPTR p = ptr + 1; + if (!read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + errorcodeptr)) + { + if (*errorcodeptr == 0) escape = ESC_k; /* No number found */ + break; + } + if (p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) + { + *errorcodeptr = ERR57; + break; + } + ptr = p + 1; + } + + /* Read an undelimited number */ + + else + { + if (!read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + errorcodeptr)) + { + if (*errorcodeptr == 0) *errorcodeptr = ERR57; /* No number found */ + break; + } + } + + if (s <= 0) + { + *errorcodeptr = ERR15; + break; + } + + escape = -s; + break; + + /* The handling of escape sequences consisting of a string of digits + starting with one that is not zero is not straightforward. Perl has changed + over the years. Nowadays \g{} for backreferences and \o{} for octal are + recommended to avoid the ambiguities in the old syntax. + + Outside a character class, the digits are read as a decimal number. If the + number is less than 10, or if there are that many previous extracting left + brackets, it is a back reference. Otherwise, up to three octal digits are + read to form an escaped character code. Thus \123 is likely to be octal 123 + (cf \0123, which is octal 012 followed by the literal 3). + + Inside a character class, \ followed by a digit is always either a literal + 8 or 9 or an octal number. */ + + case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: + case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: + + if (!isclass) + { + oldptr = ptr; + ptr--; /* Back to the digit */ + if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s, + errorcodeptr)) + break; + + /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + are octal escapes if there are not that many previous captures. */ + + if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount) + { + if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; + else escape = -s; /* Indicates a back reference */ + break; + } + ptr = oldptr; /* Put the pointer back and fall through */ + } + + /* Handle a digit following \ when the number is not a back reference, or + we are within a character class. If the first digit is 8 or 9, Perl used to + generate a binary zero and then treat the digit as a following literal. At + least by Perl 5.18 this changed so as not to insert the binary zero. */ + + if (c >= CHAR_8) break; + + /* Fall through with a digit less than 8 */ + + /* \0 always starts an octal number, but we may drop through to here with a + larger first octal digit. The original code used just to take the least + significant 8 bits of octal numbers (I think this is what early Perls used + to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, + but no more than 3 octal digits. */ + + case CHAR_0: + c -= CHAR_0; + while(i++ < 2 && ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) + c = c * 8 + *ptr++ - CHAR_0; +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (!utf && c > 0xff) *errorcodeptr = ERR51; +#endif + break; + + /* \o is a relatively new Perl feature, supporting a more general way of + specifying character codes in octal. The only supported form is \o{ddd}. */ + + case CHAR_o: + if (ptr >= ptrend || *ptr++ != CHAR_LEFT_CURLY_BRACKET) + { + ptr--; + *errorcodeptr = ERR55; + } + else if (ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET) + *errorcodeptr = ERR78; + else + { + c = 0; + overflow = FALSE; + while (ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) + { + cc = *ptr++; + if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x20000000l) { overflow = TRUE; break; } +#endif + c = (c << 3) + (cc - CHAR_0); +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } +#elif PCRE2_CODE_UNIT_WIDTH == 16 + if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } +#elif PCRE2_CODE_UNIT_WIDTH == 32 + if (utf && c > 0x10ffffU) { overflow = TRUE; break; } +#endif + } + if (overflow) + { + while (ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++; + *errorcodeptr = ERR34; + } + else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + { + if (utf && c >= 0xd800 && c <= 0xdfff) + { + ptr--; + *errorcodeptr = ERR73; + } + } + else + { + ptr--; + *errorcodeptr = ERR64; + } + } + break; + + /* \x is complicated. When PCRE2_ALT_BSUX is set, \x must be followed by + two hexadecimal digits. Otherwise it is a lowercase x letter. */ + + case CHAR_x: + if ((options & PCRE2_ALT_BSUX) != 0) + { + uint32_t xc; + if (ptrend - ptr < 2) break; /* Less than 2 characters */ + if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */ + if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ + c = (cc << 4) | xc; + ptr += 2; + } /* End PCRE2_ALT_BSUX handling */ + + /* Handle \x in Perl's style. \x{ddd} is a character number which can be + greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex + digits. If not, { used to be treated as a data character. However, Perl + seems to read hex digits up to the first non-such, and ignore the rest, so + that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE + now gives an error. */ + + else + { + if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET) + { + if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET) + { + *errorcodeptr = ERR78; + break; + } + c = 0; + overflow = FALSE; + + while (ptr < ptrend && (cc = XDIGIT(*ptr)) != 0xff) + { + ptr++; + if (c == 0 && cc == 0) continue; /* Leading zeroes */ +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x10000000l) { overflow = TRUE; break; } +#endif + c = (c << 4) | cc; + if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR)) + { + overflow = TRUE; + break; + } + } + + if (overflow) + { + while (ptr < ptrend && XDIGIT(*ptr) != 0xff) ptr++; + *errorcodeptr = ERR34; + } + else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + { + if (utf && c >= 0xd800 && c <= 0xdfff) + { + ptr--; + *errorcodeptr = ERR73; + } + } + + /* If the sequence of hex digits does not end with '}', give an error. + We used just to recognize this construct and fall through to the normal + \x handling, but nowadays Perl gives an error, which seems much more + sensible, so we do too. */ + + else + { + ptr--; + *errorcodeptr = ERR67; + } + } /* End of \x{} processing */ + + /* Read a up to two hex digits after \x */ + + else + { + c = 0; + if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ + ptr++; + c = cc; + if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ + ptr++; + c = (c << 4) | cc; + } /* End of \xdd handling */ + } /* End of Perl-style \x handling */ + break; + + /* The handling of \c is different in ASCII and EBCDIC environments. In an + ASCII (or Unicode) environment, an error is given if the character + following \c is not a printable ASCII character. Otherwise, the following + character is upper-cased if it is a letter, and after that the 0x40 bit is + flipped. The result is the value of the escape. + + In an EBCDIC environment the handling of \c is compatible with the + specification in the perlebcdic document. The following character must be + a letter or one of small number of special characters. These provide a + means of defining the character values 0-31. + + For testing the EBCDIC handling of \c in an ASCII environment, recognize + the EBCDIC value of 'c' explicitly. */ + +#if defined EBCDIC && 'a' != 0x81 + case 0x83: +#else + case CHAR_c: +#endif + if (ptr >= ptrend) + { + *errorcodeptr = ERR2; + break; + } + c = *ptr; + if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c); + + /* Handle \c in an ASCII/Unicode environment. */ + +#ifndef EBCDIC /* ASCII/UTF-8 coding */ + if (c < 32 || c > 126) /* Excludes all non-printable ASCII */ + { + *errorcodeptr = ERR68; + break; + } + c ^= 0x40; + + /* Handle \c in an EBCDIC environment. The special case \c? is converted to + 255 (0xff) or 95 (0x5f) if other character suggest we are using th POSIX-BC + encoding. (This is the way Perl indicates that it handles \c?.) The other + valid sequences correspond to a list of specific characters. */ + +#else + if (c == CHAR_QUESTION_MARK) + c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff; + else + { + for (i = 0; i < 32; i++) + { + if (c == ebcdic_escape_c[i]) break; + } + if (i < 32) c = i; else *errorcodeptr = ERR68; + } +#endif /* EBCDIC */ + + ptr++; + break; + + /* Any other alphanumeric following \ is an error. Perl gives an error only + if in warning mode, but PCRE doesn't have a warning mode. */ + + default: + *errorcodeptr = ERR3; + *ptrptr = ptr - 1; /* Point to the character at fault */ + return 0; + } + } + +/* Perl supports \N{name} for character names, as well as plain \N for "not +newline". PCRE does not support \N{name}. However, it does support +quantification such as \N{2,3}. */ + +if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET && + ptrend - ptr > 2) + { + PCRE2_SPTR p = ptr + 1; + if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) && + *errorcodeptr == 0) + *errorcodeptr = ERR37; + } + +/* Set the pointer to the next character before returning. */ + +*ptrptr = ptr; +*chptr = c; +return escape; +} + + + +#ifdef SUPPORT_UNICODE +/************************************************* +* Handle \P and \p * +*************************************************/ + +/* This function is called after \P or \p has been encountered, provided that +PCRE2 is compiled with support for UTF and Unicode properties. On entry, the +contents of ptrptr are pointing after the P or p. On exit, it is left pointing +after the final code unit of the escape sequence. + +Arguments: + ptrptr the pattern position pointer + negptr a boolean that is set TRUE for negation else FALSE + ptypeptr an unsigned int that is set to the type value + pdataptr an unsigned int that is set to the detailed property value + errorcodeptr the error code variable + cb the compile data + +Returns: TRUE if the type value was found, or FALSE for an invalid type +*/ + +static BOOL +get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr, + uint16_t *pdataptr, int *errorcodeptr, compile_block *cb) +{ +PCRE2_UCHAR c; +PCRE2_SIZE i, bot, top; +PCRE2_SPTR ptr = *ptrptr; +PCRE2_UCHAR name[32]; + +if (ptr >= cb->end_pattern) goto ERROR_RETURN; +c = *ptr++; +*negptr = FALSE; + +/* \P or \p can be followed by a name in {}, optionally preceded by ^ for +negation. */ + +if (c == CHAR_LEFT_CURLY_BRACKET) + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + if (*ptr == CHAR_CIRCUMFLEX_ACCENT) + { + *negptr = TRUE; + ptr++; + } + for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + c = *ptr++; + if (c == CHAR_NULL) goto ERROR_RETURN; + if (c == CHAR_RIGHT_CURLY_BRACKET) break; + name[i] = c; + } + if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; + name[i] = 0; + } + +/* Otherwise there is just one following character, which must be an ASCII +letter. */ + +else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) + { + name[0] = c; + name[1] = 0; + } +else goto ERROR_RETURN; + +*ptrptr = ptr; + +/* Search for a recognized property name using binary chop. */ + +bot = 0; +top = PRIV(utt_size); + +while (bot < top) + { + int r; + i = (bot + top) >> 1; + r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); + if (r == 0) + { + *ptypeptr = PRIV(utt)[i].type; + *pdataptr = PRIV(utt)[i].value; + return TRUE; + } + if (r > 0) bot = i + 1; else top = i; + } +*errorcodeptr = ERR47; /* Unrecognized name */ +return FALSE; + +ERROR_RETURN: /* Malformed \P or \p */ +*errorcodeptr = ERR46; +*ptrptr = ptr; +return FALSE; +} +#endif + + + +/************************************************* +* Check for POSIX class syntax * +*************************************************/ + +/* This function is called when the sequence "[:" or "[." or "[=" is +encountered in a character class. It checks whether this is followed by a +sequence of characters terminated by a matching ":]" or ".]" or "=]". If we +reach an unescaped ']' without the special preceding character, return FALSE. + +Originally, this function only recognized a sequence of letters between the +terminators, but it seems that Perl recognizes any sequence of characters, +though of course unknown POSIX names are subsequently rejected. Perl gives an +"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE +didn't consider this to be a POSIX class. Likewise for [:1234:]. + +The problem in trying to be exactly like Perl is in the handling of escapes. We +have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX +class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code +below handles the special cases \\ and \], but does not try to do any other +escape processing. This makes it different from Perl for cases such as +[:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does +not recognize "l\ower". This is a lesser evil than not diagnosing bad classes +when Perl does, I think. + +A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. +It seems that the appearance of a nested POSIX class supersedes an apparent +external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or +a digit. This is handled by returning FALSE if the start of a new group with +the same terminator is encountered, since the next closing sequence must close +the nested group, not the outer one. + +In Perl, unescaped square brackets may also appear as part of class names. For +example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for +[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not +seem right at all. PCRE does not allow closing square brackets in POSIX class +names. + +Arguments: + ptr pointer to the character after the initial [ (colon, dot, equals) + ptrend pointer to the end of the pattern + endptr where to return a pointer to the terminating ':', '.', or '=' + +Returns: TRUE or FALSE +*/ + +static BOOL +check_posix_syntax(PCRE2_SPTR ptr, PCRE2_SPTR ptrend, PCRE2_SPTR *endptr) +{ +PCRE2_UCHAR terminator; /* Don't combine these lines; the Solaris cc */ +terminator = *ptr++; /* compiler warns about "non-constant" initializer. */ + +for (; ptrend - ptr >= 2; ptr++) + { + if (*ptr == CHAR_BACKSLASH && + (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH)) + ptr++; + + else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) || + *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; + + else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) + { + *endptr = ptr; + return TRUE; + } + } + +return FALSE; +} + + + +/************************************************* +* Check POSIX class name * +*************************************************/ + +/* This function is called to check the name given in a POSIX-style class entry +such as [:alnum:]. + +Arguments: + ptr points to the first letter + len the length of the name + +Returns: a value representing the name, or -1 if unknown */ static int -find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb, - recurse_check *recurses) +check_posix_name(PCRE2_SPTR ptr, int len) { -int length = -1; -recurse_check this_recurse; -register int branchlength = 0; -register PCRE2_UCHAR *cc = code + 1 + LINK_SIZE; - -/* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ - -for (;;) +const char *pn = posix_names; +int yield = 0; +while (posix_name_lengths[yield] != 0) { - int d; - PCRE2_UCHAR *ce, *cs; - register PCRE2_UCHAR op = *cc; + if (len == posix_name_lengths[yield] && + PRIV(strncmp_c8)(ptr, pn, (unsigned int)len) == 0) return yield; + pn += posix_name_lengths[yield] + 1; + yield++; + } +return -1; +} - switch (op) + + +/************************************************* +* Read a subpattern or VERB name * +*************************************************/ + +/* This function is called from parse_regex() below whenever it needs to read +the name of a subpattern or a (*VERB). The initial pointer must be to the +character before the name. If that character is '*' we are reading a verb name. +The pointer is updated to point after the name, for a VERB, or after tha name's +terminator for a subpattern name. Returning both the offset and the name +pointer is redundant information, but some callers use one and some the other, +so it is simplest just to return both. + +Arguments: + ptrptr points to the character pointer variable + ptrend points to the end of the input string + terminator the terminator of a subpattern name must be this + offsetptr where to put the offset from the start of the pattern + nameptr where to put a pointer to the name in the input + namelenptr where to put the length of the name + errcodeptr where to put an error code + cb pointer to the compile data block + +Returns: TRUE if a name was read + FALSE otherwise, with error code set +*/ + +static BOOL +read_name(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t terminator, + PCRE2_SIZE *offsetptr, PCRE2_SPTR *nameptr, uint32_t *namelenptr, + int *errorcodeptr, compile_block *cb) +{ +PCRE2_SPTR ptr = *ptrptr; +BOOL is_verb = (*ptr == CHAR_ASTERISK); +uint32_t namelen = 0; +uint32_t ctype = is_verb? ctype_letter : ctype_word; + +if (++ptr >= ptrend) + { + *errorcodeptr = is_verb? ERR60: /* Verb not recognized or malformed */ + ERR62; /* Subpattern name expected */ + goto FAILED; + } + +*nameptr = ptr; +*offsetptr = (PCRE2_SIZE)(ptr - cb->start_pattern); + +if (IS_DIGIT(*ptr)) + { + *errorcodeptr = ERR44; /* Group name must not start with digit */ + goto FAILED; + } + +while (ptr < ptrend && MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0) + { + ptr++; + namelen++; + if (namelen > MAX_NAME_SIZE) { - /* We only need to continue for OP_CBRA (normal capturing bracket) and - OP_BRA (normal non-capturing bracket) because the other variants of these - opcodes are all concerned with unlimited repeated groups, which of course - are not of fixed length. */ + *errorcodeptr = ERR48; + goto FAILED; + } + } - case OP_CBRA: - case OP_BRA: - case OP_ONCE: - case OP_ONCE_NC: - case OP_COND: - d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cb, - recurses); - if (d < 0) return d; - branchlength += d; - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += 1 + LINK_SIZE; - break; +/* Subpattern names must not be empty, and their terminator is checked here. +(What follows a verb name is checked separately.) */ - /* Reached end of a branch; if it's a ket it is the end of a nested call. - If it's ALT it is an alternation in a nested call. An ACCEPT is effectively - an ALT. If it is END it's the end of the outer call. All can be handled by - the same code. Note that we must not include the OP_KETRxxx opcodes here, - because they all imply an unlimited repeat. */ +if (!is_verb) + { + if (namelen == 0) + { + *errorcodeptr = ERR62; /* Subpattern name expected */ + goto FAILED; + } + if (ptr >= ptrend || *ptr != (PCRE2_UCHAR)terminator) + { + *errorcodeptr = ERR42; + goto FAILED; + } + ptr++; + } - case OP_ALT: - case OP_KET: - case OP_END: - case OP_ACCEPT: - case OP_ASSERT_ACCEPT: - if (length < 0) length = branchlength; - else if (length != branchlength) return -1; - if (*cc != OP_ALT) return length; - cc += 1 + LINK_SIZE; - branchlength = 0; - break; +*namelenptr = namelen; +*ptrptr = ptr; +return TRUE; - /* A true recursion implies not fixed length, but a subroutine call may - be OK. If the subroutine is a forward reference, we can't deal with - it until the end of the pattern, so return -3. */ +FAILED: +*ptrptr = ptr; +return FALSE; +} - case OP_RECURSE: - if (!atend) return -3; - cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */ - do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ - if (cc > cs && cc < ce) return -1; /* Recursion */ - else /* Check for mutual recursion */ - { - recurse_check *r = recurses; - for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; - if (r != NULL) return -1; /* Mutual recursion */ - } - this_recurse.prev = recurses; - this_recurse.group = cs; - d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cb, &this_recurse); - if (d < 0) return d; - branchlength += d; - cc += 1 + LINK_SIZE; - break; - /* Skip over assertive subpatterns. Note that we must increment cc by - 1 + LINK_SIZE at the end, not by OP_length[*cc] because in a recursive - situation this assertion may be the one that is ultimately being checked - for having a fixed length, in which case its terminating OP_KET will have - been temporarily replaced by OP_END. */ - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += 1 + LINK_SIZE; - break; +/************************************************* +* Manage callouts at start of cycle * +*************************************************/ - /* Skip over things that don't match chars */ +/* At the start of a new item in parse_regex() we are able to record the +details of the previous item in a prior callout, and also to set up an +automatic callout if enabled. Avoid having two adjacent automatic callouts, +which would otherwise happen for items such as \Q that contribute nothing to +the parsed pattern. - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - case OP_THEN_ARG: - cc += cc[1] + PRIV(OP_lengths)[*cc]; - break; +Arguments: + ptr current pattern pointer + pcalloutptr points to a pointer to previous callout, or NULL + options the compiling options + parsed_pattern the parsed pattern pointer + cb compile block - case OP_CALLOUT: - case OP_CIRC: - case OP_CIRCM: - case OP_CLOSE: - case OP_COMMIT: - case OP_CREF: - case OP_FALSE: - case OP_TRUE: - case OP_DNCREF: - case OP_DNRREF: - case OP_DOLL: - case OP_DOLLM: - case OP_EOD: - case OP_EODN: - case OP_FAIL: - case OP_NOT_WORD_BOUNDARY: - case OP_PRUNE: - case OP_REVERSE: - case OP_RREF: - case OP_SET_SOM: - case OP_SKIP: - case OP_SOD: - case OP_SOM: - case OP_THEN: - case OP_WORD_BOUNDARY: - cc += PRIV(OP_lengths)[*cc]; - break; +Returns: possibly updated parsed_pattern pointer. +*/ - case OP_CALLOUT_STR: - cc += GET(cc, 1 + 2*LINK_SIZE); - break; +static uint32_t * +manage_callouts(PCRE2_SPTR ptr, uint32_t **pcalloutptr, uint32_t options, + uint32_t *parsed_pattern, compile_block *cb) +{ +uint32_t *previous_callout = *pcalloutptr; - /* Handle literal characters */ +if (previous_callout != NULL) previous_callout[2] = ptr - cb->start_pattern - + (PCRE2_SIZE)previous_callout[1]; - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - branchlength++; - cc += 2; -#ifdef SUPPORT_UNICODE - if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; +if ((options & PCRE2_AUTO_CALLOUT) == 0) previous_callout = NULL; else + { + if (previous_callout == NULL || + previous_callout != parsed_pattern - 4 || + previous_callout[3] != 255) + { + previous_callout = parsed_pattern; /* Set up new automatic callout */ + parsed_pattern += 4; + previous_callout[0] = META_CALLOUT_NUMBER; + previous_callout[2] = 0; + previous_callout[3] = 255; + } + previous_callout[1] = (uint32_t)(ptr - cb->start_pattern); + } - /* Handle exact repetitions. The count is already in characters, but we - need to skip over a multibyte character in UTF8 mode. */ +*pcalloutptr = previous_callout; +return parsed_pattern; +} - case OP_EXACT: - case OP_EXACTI: - case OP_NOTEXACT: - case OP_NOTEXACTI: - branchlength += (int)GET2(cc,1); - cc += 2 + IMM2_SIZE; -#ifdef SUPPORT_UNICODE - if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - break; - case OP_TYPEEXACT: - branchlength += GET2(cc,1); - if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) - cc += 2; - cc += 1 + IMM2_SIZE + 1; - break; - /* Handle single-char matchers */ +/************************************************* +* Parse regex and identify named groups * +*************************************************/ - case OP_PROP: - case OP_NOTPROP: - cc += 2; - /* Fall through */ +/* This function is called first of all. It scans the pattern and does two +things: (1) It identifies capturing groups and makes a table of named capturing +groups so that information about them is fully available to both the compiling +scans. (2) It writes a parsed version of the pattern with comments omitted and +escapes processed into the parsed_pattern vector. - case OP_HSPACE: - case OP_VSPACE: - case OP_NOT_HSPACE: - case OP_NOT_VSPACE: - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - branchlength++; - cc++; - break; +Arguments: + ptr points to the start of the pattern + options compiling dynamic options (may change during the scan) + has_lookbehind points to a boolean, set TRUE if a lookbehind is found + cb pointer to the compile data block - /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode; - otherwise \C is coded as OP_ALLANY. */ +Returns: zero on success or a non-zero error code, with the + error offset placed in the cb field +*/ - case OP_ANYBYTE: - return -2; +/* A structure and some flags for dealing with nested groups. */ - /* Check a class for variable quantification */ +typedef struct nest_save { + uint16_t nest_depth; + uint16_t reset_group; + uint16_t max_group; + uint16_t flags; +} nest_save; - case OP_CLASS: - case OP_NCLASS: -#ifdef SUPPORT_WIDE_CHARS - case OP_XCLASS: - /* The original code caused an unsigned overflow in 64 bit systems, - so now we use a conditional statement. */ - if (op == OP_XCLASS) - cc += GET(cc, 1); - else - cc += PRIV(OP_lengths)[OP_CLASS]; +#define NSF_RESET 0x0001u +#define NSF_EXTENDED 0x0002u +#define NSF_DUPNAMES 0x0004u +#define NSF_CONDASSERT 0x0008u + +/* States used for analyzing ranges in character classes. The two OK values +must be last. */ + +enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL }; + +/* Only in 32-bit mode can there be literals > META_END. A macros encapsulates +the storing of literal values in the parsed pattern. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +#define PARSED_LITERAL(c, p) \ + { \ + if (c >= META_END) *p++ = META_BIGVALUE; \ + *p++ = c; \ + okquantifier = TRUE; \ + } #else - cc += PRIV(OP_lengths)[OP_CLASS]; +#define PARSED_LITERAL(c, p) *p++ = c; okquantifier = TRUE; #endif - switch (*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - case OP_CRPOSSTAR: - case OP_CRPOSPLUS: - case OP_CRPOSQUERY: - return -1; +/* Here's the actual function. */ - case OP_CRRANGE: - case OP_CRMINRANGE: - case OP_CRPOSRANGE: - if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; - branchlength += (int)GET2(cc,1); - cc += 1 + 2 * IMM2_SIZE; +static int parse_regex(PCRE2_SPTR ptr, uint32_t options, BOOL *has_lookbehind, + compile_block *cb) +{ +uint32_t c; +uint32_t delimiter; +uint32_t namelen; +uint32_t class_range_state; +uint32_t *verblengthptr = NULL; /* Value avoids compiler warning */ +uint32_t *previous_callout = NULL; +uint32_t *parsed_pattern = cb->parsed_pattern; +uint32_t *parsed_pattern_end = cb->parsed_pattern_end; +uint32_t meta_quantifier = 0; +uint16_t nest_depth = 0; +int after_manual_callout = 0; +int expect_cond_assert = 0; +int errorcode = 0; +int escape; +int i; +BOOL inescq = FALSE; +BOOL inverbname = FALSE; +BOOL utf = (options & PCRE2_UTF) != 0; +BOOL isdupname; +BOOL negate_class; +BOOL okquantifier = FALSE; +PCRE2_SPTR name; +PCRE2_SPTR ptrend = cb->end_pattern; +PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */ +named_group *ng; +nest_save *top_nest = NULL; +nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size); + +/* The size of the nest_save structure might not be a factor of the size of the +workspace. Therefore we must round down end_nests so as to correctly avoid +creating a nest_save that spans the end of the workspace. */ + +end_nests = (nest_save *)((char *)end_nests - + ((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save))); + +/* Now scan the pattern */ + +*has_lookbehind = FALSE; + +while (ptr < ptrend) + { + int prev_expect_cond_assert; + uint32_t min_repeat, max_repeat; + uint32_t set, unset, *optset; + uint32_t terminator; + uint32_t prev_meta_quantifier; + BOOL prev_okquantifier; + PCRE2_SPTR tempptr; + PCRE2_SPTR thisptr; + PCRE2_SIZE offset; + + if (parsed_pattern >= parsed_pattern_end) + { + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ + goto FAILED; + } + + if (nest_depth > cb->cx->parens_nest_limit) + { + errorcode = ERR19; + goto FAILED; + } + + /* Get next input character, save its position for callout handling. */ + + thisptr = ptr; + GETCHARINCTEST(c, ptr); + + /* Copy quoted literals until \E, allowing for the possibility of automatic + callouts, except when processing a (*VERB) "name". */ + + if (inescq) + { + if (c == CHAR_BACKSLASH && ptr < ptrend && *ptr == CHAR_E) + { + inescq = FALSE; + ptr++; /* Skip E */ + } + else + { + if (expect_cond_assert > 0) /* A literal is not allowed if we are */ + { /* expecting a conditional assertion, */ + ptr--; /* but an empty \Q\E sequence is OK. */ + errorcode = ERR28; + goto FAILED; + } + if (!inverbname && after_manual_callout-- <= 0) + parsed_pattern = manage_callouts(thisptr, &previous_callout, options, + parsed_pattern, cb); + PARSED_LITERAL(c, parsed_pattern); + meta_quantifier = 0; + } + continue; /* Next character */ + } + + /* If we are processing the "name" part of a (*VERB:NAME) item, all + characters up to the closing parenthesis are literals except when + PCRE2_ALT_VERBNAMES is set. That causes backslash interpretation, but only \Q + and \E and escaped characters are allowed (no character types such as \d). If + PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do + this by not entering the special (*VERB:NAME) processing - they are then + picked up below. Note that c is a character, not a code unit, so we must not + use MAX_255 to test its size because MAX_255 tests code units and is assumed + TRUE in 8-bit mode. */ + + if (inverbname && + ( + /* EITHER: not both options set */ + ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != + (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || + /* OR: character > 255 */ + c > 255 || + /* OR: not a # comment or white space */ + (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0) + )) + { + PCRE2_SIZE verbnamelength; + + switch(c) + { + default: + PARSED_LITERAL(c, parsed_pattern); + break; + + case CHAR_RIGHT_PARENTHESIS: + inverbname = FALSE; + okquantifier = FALSE; /* Was probably set by literals */ + /* This is the length in characters */ + verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1); + /* But the limit on the length is in code units */ + if (ptr - verbnamestart - 1 > (int)MAX_MARK) + { + ptr--; + errorcode = ERR76; + goto FAILED; + } + *verblengthptr = (uint32_t)verbnamelength; + break; + + case CHAR_BACKSLASH: + if ((options & PCRE2_ALT_VERBNAMES) != 0) + { + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, + FALSE, cb); + if (errorcode != 0) goto FAILED; + } + else escape = 0; /* Treat all as literal */ + + switch(escape) + { + case 0: + PARSED_LITERAL(c, parsed_pattern); + break; + + case ESC_Q: + inescq = TRUE; + break; + + case ESC_E: /* Ignore */ + break; + + default: + errorcode = ERR40; /* Invalid in verb name */ + goto FAILED; + } + } + continue; /* Next character in pattern */ + } + + /* Not a verb name character. At this point we must process everything that + must not change the quantification state. This is mainly comments, but we + handle \Q and \E here as well, so that an item such as A\Q\E+ is treated as + A+, as in Perl. An isolated \E is ignored. */ + + if (c == CHAR_BACKSLASH && ptr < ptrend) + { + if (*ptr == CHAR_Q || *ptr == CHAR_E) + { + inescq = *ptr == CHAR_Q; + ptr++; + continue; + } + } + + /* Skip over whitespace and # comments in extended mode. Note that c is a + character, not a code unit, so we must not use MAX_255 to test its size + because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */ + + if ((options & PCRE2_EXTENDED) != 0) + { + if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; + if (c == CHAR_NUMBER_SIGN) + { + while (ptr < ptrend) + { + if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ + { /* IS_NEWLINE sets cb->nllen. */ + ptr += cb->nllen; + break; + } + ptr++; +#ifdef SUPPORT_UNICODE + if (utf) FORWARDCHARTEST(ptr, ptrend); +#endif + } + continue; /* Next character in pattern */ + } + } + + /* Skip over bracketed comments */ + + if (c == CHAR_LEFT_PARENTHESIS && ptrend - ptr >= 2 && + ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) + { + while (++ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS); + if (ptr >= ptrend) + { + errorcode = ERR18; /* A special error for missing ) in a comment */ + goto FAILED; /* to make it easier to debug. */ + } + ptr++; + continue; /* Next character in pattern */ + } + + /* If the next item is not a quantifier, fill in length of any previous + callout and create an auto callout if required. */ + + if (c != CHAR_ASTERISK && c != CHAR_PLUS && c != CHAR_QUESTION_MARK && + (c != CHAR_LEFT_CURLY_BRACKET || + (tempptr = ptr, + !read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode)))) + { + if (after_manual_callout-- <= 0) + parsed_pattern = manage_callouts(thisptr, &previous_callout, options, + parsed_pattern, cb); + } + + /* If expect_cond_assert is 2, we have just passed (?( and are expecting an + assertion, possibly preceded by a callout. If the value is 1, we have just + had the callout and expect an assertion. There must be at least 3 more + characters in all cases. When expect_cond_assert is 2, we know that the + current character is an opening parenthesis, as otherwise we wouldn't be + here. However, when it is 1, we need to check, and it's easiest just to check + always. Note that expect_cond_assert may be negative, since all callouts just + decrement it. */ + + if (expect_cond_assert > 0) + { + BOOL ok = c == CHAR_LEFT_PARENTHESIS && ptrend - ptr >= 3 && + ptr[0] == CHAR_QUESTION_MARK; + if (ok) switch(ptr[1]) + { + case CHAR_C: + ok = expect_cond_assert == 2; + break; + + case CHAR_EQUALS_SIGN: + case CHAR_EXCLAMATION_MARK: + break; + + case CHAR_LESS_THAN_SIGN: + ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK; break; default: - branchlength++; + ok = FALSE; + } + + if (!ok) + { + ptr--; /* Adjust error offset */ + errorcode = ERR28; + goto FAILED; + } + } + + /* Remember whether we are expecting a conditional assertion, and set the + default for this item. */ + + prev_expect_cond_assert = expect_cond_assert; + expect_cond_assert = 0; + + /* Remember quantification status for the previous significant item, then set + default for this item. */ + + prev_okquantifier = okquantifier; + prev_meta_quantifier = meta_quantifier; + okquantifier = FALSE; + meta_quantifier = 0; + + /* If the previous significant item was a quantifier, adjust the parsed code + if there is a following modifier. The base meta value is always followed by + the PLUS and QUERY values, in that order. We do this here rather than after + reading a quantifier so that intervening comments and /x whitespace can be + ignored without having to replicate code. */ + + if (prev_meta_quantifier != 0 && (c == CHAR_QUESTION_MARK || c == CHAR_PLUS)) + { + parsed_pattern[(prev_meta_quantifier == META_MINMAX)? -3 : -1] = + prev_meta_quantifier + ((c == CHAR_QUESTION_MARK)? + 0x00020000u : 0x00010000u); + continue; /* Next character in pattern */ + } + + + /* Process the next item in the main part of a pattern. */ + + switch(c) + { + default: /* Non-special character */ + PARSED_LITERAL(c, parsed_pattern); + break; + + + /* ---- Escape sequence ---- */ + + case CHAR_BACKSLASH: + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, + FALSE, cb); + if (errorcode != 0) goto FAILED; + + /* The escape was a data character. */ + + if (escape == 0) + { + PARSED_LITERAL(c, parsed_pattern); + } + + /* The escape was a back (or forward) reference. We keep the offset in + order to give a more useful diagnostic for a bad forward reference. For + references to groups numbered less than 10 we can't use more than two items + in parsed_pattern because they may be just two characters in the input (and + in a 64-bit world an offset may need two elements). So for them, the offset + of the first occurrent is held in a special vector. */ + + else if (escape < 0) + { + offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 1); + escape = -escape; + *parsed_pattern++ = META_BACKREF | (uint32_t)escape; + if (escape < 10) + { + if (cb->small_ref_offset[escape] == PCRE2_UNSET) + cb->small_ref_offset[escape] = offset; + } + else + { + PUTOFFSET(offset, parsed_pattern); + } + okquantifier = TRUE; + } + + /* The escape was a character class such as \d etc. or other special + escape indicator such as \A or \X. Most of them generate just a single + parsed item, but \P and \p are followed by a 16-bit type and a 16-bit + value. They are supported only when Unicode is available. The type and + value are packed into a single 32-bit value so that the whole sequences + uses only two elements in the parsed_vector. This is because the same + coding is used if \d (for example) is turned into \p{Nd} when PCRE2_UCP is + set. + + There are also some cases where the escape sequence is followed by a name: + \k{name}, \k, and \k'name' are backreferences by name, and \g + and \g'name' are subroutine calls by name; \g{name} is a synonym for + \k{name}. Note that \g and \g'number' are handled by check_escape() + and returned as a negative value (handled above). A name is coded as an + offset into the pattern and a length. */ + + else switch (escape) + { + case ESC_C: +#ifdef NEVER_BACKSLASH_C + errorcode = ERR85; + goto FAILED; +#else + if ((options & PCRE2_NEVER_BACKSLASH_C) != 0) + { + errorcode = ERR83; + goto FAILED; + } +#endif + okquantifier = TRUE; + *parsed_pattern++ = META_ESCAPE + escape; + break; + + case ESC_X: +#ifndef SUPPORT_UNICODE + errorcode = ERR45; /* Supported only with Unicode support */ + goto FAILED; +#endif + case ESC_H: + case ESC_h: + case ESC_N: + case ESC_R: + case ESC_V: + case ESC_v: + okquantifier = TRUE; + *parsed_pattern++ = META_ESCAPE + escape; + break; + + default: /* \A, \B, \b, \G, \K, \Z, \z cannot be quantified. */ + *parsed_pattern++ = META_ESCAPE + escape; + break; + + /* Escapes that change in UCP mode. Note that PCRE2_UCP will never be set + without Unicode support because it is checked when pcre2_compile() is + called. */ + + case ESC_d: + case ESC_D: + case ESC_s: + case ESC_S: + case ESC_w: + case ESC_W: + okquantifier = TRUE; + if ((options & PCRE2_UCP) == 0) + { + *parsed_pattern++ = META_ESCAPE + escape; + } + else + { + *parsed_pattern++ = META_ESCAPE + + ((escape == ESC_d || escape == ESC_s || escape == ESC_w)? + ESC_p : ESC_P); + switch(escape) + { + case ESC_d: + case ESC_D: + *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; + break; + + case ESC_s: + case ESC_S: + *parsed_pattern++ = PT_SPACE << 16; + break; + + case ESC_w: + case ESC_W: + *parsed_pattern++ = PT_WORD << 16; + break; + } + } + break; + + /* Unicode property matching */ + + case ESC_P: + case ESC_p: +#ifdef SUPPORT_UNICODE + { + BOOL negated; + uint16_t ptype = 0, pdata = 0; + if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb)) + goto FAILED; + if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; + *parsed_pattern++ = META_ESCAPE + escape; + *parsed_pattern++ = (ptype << 16) | pdata; + okquantifier = TRUE; + } +#else + errorcode = ERR45; + goto FAILED; +#endif + break; /* End \P and \p */ + + /* When \g is used with quotes or angle brackets as delimiters, it is a + numerical or named subroutine call, and control comes here. When used + with brace delimiters it is a numberical back reference and does not come + here because check_escape() returns it directly as a reference. \k is + always a named back reference. */ + + case ESC_g: + case ESC_k: + if (ptr >= ptrend || (*ptr != CHAR_LEFT_CURLY_BRACKET && + *ptr != CHAR_LESS_THAN_SIGN && *ptr != CHAR_APOSTROPHE)) + { + errorcode = (escape == ESC_g)? ERR57 : ERR69; + goto FAILED; + } + terminator = (*ptr == CHAR_LESS_THAN_SIGN)? + CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? + CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; + + /* For a non-braced \g, check for a numerical recursion. */ + + if (escape == ESC_g && terminator != CHAR_RIGHT_CURLY_BRACKET) + { + PCRE2_SPTR p = ptr + 1; + + if (read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i, + &errorcode)) + { + if (p >= ptrend || *p != terminator) + { + errorcode = ERR57; + goto FAILED; + } + ptr = p; + goto SET_RECURSION; + } + if (errorcode != 0) goto FAILED; + } + + /* Not a numerical recursion */ + + if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen, + &errorcode, cb)) goto FAILED; + + /* \k and \g when used with braces are back references, whereas \g used + with quotes or angle brackets is a recursion */ + + *parsed_pattern++ = + (escape == ESC_k || terminator == CHAR_RIGHT_CURLY_BRACKET)? + META_BACKREF_BYNAME : META_RECURSE_BYNAME; + *parsed_pattern++ = namelen; + + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; + } + break; /* End escape sequence processing */ + + + /* ---- Single-character special items ---- */ + + case CHAR_CIRCUMFLEX_ACCENT: + *parsed_pattern++ = META_CIRCUMFLEX; + break; + + case CHAR_DOLLAR_SIGN: + *parsed_pattern++ = META_DOLLAR; + break; + + case CHAR_DOT: + *parsed_pattern++ = META_DOT; + okquantifier = TRUE; + break; + + + /* ---- Single-character quantifiers ---- */ + + case CHAR_ASTERISK: + meta_quantifier = META_ASTERISK; + goto CHECK_QUANTIFIER; + + case CHAR_PLUS: + meta_quantifier = META_PLUS; + goto CHECK_QUANTIFIER; + + case CHAR_QUESTION_MARK: + meta_quantifier = META_QUERY; + goto CHECK_QUANTIFIER; + + + /* ---- Potential {n,m} quantifier ---- */ + + case CHAR_LEFT_CURLY_BRACKET: + if (!read_repeat_counts(&ptr, ptrend, &min_repeat, &max_repeat, + &errorcode)) + { + if (errorcode != 0) goto FAILED; /* Error in quantifier. */ + PARSED_LITERAL(c, parsed_pattern); /* Not a quantifier */ + break; /* No more quantifier processing */ + } + meta_quantifier = META_MINMAX; + /* Fall through */ + + + /* ---- Quantifier post-processing ---- */ + + /* Check that a quantifier is allowed after the previous item. */ + + CHECK_QUANTIFIER: + if (!prev_okquantifier) + { + errorcode = ERR9; + goto FAILED_BACK; + } + + /* Now we can put the quantifier into the parsed pattern vector. At this + stage, we have only the basic quantifier. The check for a following + or ? + modifier happens at the top of the loop, after any intervening comments + have been removed. */ + + *parsed_pattern++ = meta_quantifier; + if (c == CHAR_LEFT_CURLY_BRACKET) + { + *parsed_pattern++ = min_repeat; + *parsed_pattern++ = max_repeat; } break; - /* Anything else is variable length */ - case OP_ANYNL: - case OP_BRAMINZERO: - case OP_BRAPOS: - case OP_BRAPOSZERO: - case OP_BRAZERO: - case OP_CBRAPOS: - case OP_EXTUNI: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - case OP_MINPLUS: - case OP_MINPLUSI: - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - case OP_NOTQUERY: - case OP_NOTQUERYI: - case OP_NOTSTAR: - case OP_NOTSTARI: - case OP_NOTUPTO: - case OP_NOTUPTOI: - case OP_PLUS: - case OP_PLUSI: - case OP_POSPLUS: - case OP_POSPLUSI: - case OP_POSQUERY: - case OP_POSQUERYI: - case OP_POSSTAR: - case OP_POSSTARI: - case OP_POSUPTO: - case OP_POSUPTOI: - case OP_QUERY: - case OP_QUERYI: - case OP_REF: - case OP_REFI: - case OP_DNREF: - case OP_DNREFI: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCBRA: - case OP_SCBRAPOS: - case OP_SCOND: - case OP_SKIPZERO: - case OP_STAR: - case OP_STARI: - case OP_TYPEMINPLUS: - case OP_TYPEMINQUERY: - case OP_TYPEMINSTAR: - case OP_TYPEMINUPTO: - case OP_TYPEPLUS: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSUPTO: - case OP_TYPEQUERY: - case OP_TYPESTAR: - case OP_TYPEUPTO: - case OP_UPTO: - case OP_UPTOI: - return -1; + /* ---- Character class ---- */ - /* Catch unrecognized opcodes so that when new ones are added they - are not forgotten, as has happened in the past. */ + case CHAR_LEFT_SQUARE_BRACKET: + okquantifier = TRUE; - default: - return -4; - } + /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is + used for "start of word" and "end of word". As these are otherwise illegal + sequences, we don't break anything by recognizing them. They are replaced + by \b(?=\w) and \b(?<=\w) respectively. Sequences like [a[:<:]] are + erroneous and are handled by the normal code below. */ + + if (ptrend - ptr >= 6 && + (PRIV(strncmp_c8)(ptr, STRING_WEIRD_STARTWORD, 6) == 0 || + PRIV(strncmp_c8)(ptr, STRING_WEIRD_ENDWORD, 6) == 0)) + { + *parsed_pattern++ = META_ESCAPE + ESC_b; + + if (ptr[2] == CHAR_LESS_THAN_SIGN) + { + *parsed_pattern++ = META_LOOKAHEAD; + } + else + { + *parsed_pattern++ = META_LOOKBEHIND; + *has_lookbehind = TRUE; + + /* The offset is used only for the "non-fixed length" error; this won't + occur here, so just store zero. */ + + PUTOFFSET((PCRE2_SIZE)0, parsed_pattern); + } + + if ((options & PCRE2_UCP) == 0) + *parsed_pattern++ = META_ESCAPE + ESC_w; + else + { + *parsed_pattern++ = META_ESCAPE + ESC_p; + *parsed_pattern++ = PT_WORD << 16; + } + *parsed_pattern++ = META_KET; + ptr += 6; + break; + } + + /* PCRE supports POSIX class stuff inside a class. Perl gives an error if + they are encountered at the top level, so we'll do that too. */ + + if (ptr < ptrend && (*ptr == CHAR_COLON || *ptr == CHAR_DOT || + *ptr == CHAR_EQUALS_SIGN) && + check_posix_syntax(ptr, ptrend, &tempptr)) + { + errorcode = (*ptr-- == CHAR_COLON)? ERR12 : ERR13; + goto FAILED; + } + + /* Process a regular character class. If the first character is '^', set + the negation flag. If the first few characters (either before or after ^) + are \Q\E or \E we skip them too. This makes for compatibility with Perl. */ + + negate_class = FALSE; + while (ptr < ptrend) + { + GETCHARINCTEST(c, ptr); + if (c == CHAR_BACKSLASH) + { + if (ptr < ptrend && *ptr == CHAR_E) ptr++; + else if (ptrend - ptr >= 3 && + PRIV(strncmp_c8)(ptr, STR_Q STR_BACKSLASH STR_E, 3) == 0) + ptr += 3; + else + break; + } + else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) + negate_class = TRUE; + else break; + } + + /* Now the real contents of the class; c has the first "real" character. + Empty classes are permitted only if the option is set. */ + + if (c == CHAR_RIGHT_SQUARE_BRACKET && + (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) + { + *parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY; + break; /* End of class processing */ + } + + /* Process a non-empty class. */ + + *parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS; + class_range_state = RANGE_NO; + + /* In an EBCDIC environment, Perl treats alphabetic ranges specially + because there are holes in the encoding, and simply using the range A-Z + (for example) would include the characters in the holes. This applies only + to ranges where both values are literal; [\xC1-\xE9] is different to [A-Z] + in this respect. In order to accommodate this, we keep track of whether + character values are literal or not, and a state variable for handling + ranges. */ + + /* Loop for the contents of the class */ + + for (;;) + { + BOOL char_is_literal = TRUE; + + /* Inside \Q...\E everything is literal except \E */ + + if (inescq) + { + if (c == CHAR_BACKSLASH && ptr < ptrend && *ptr == CHAR_E) + { + inescq = FALSE; /* Reset literal state */ + ptr++; /* Skip the 'E' */ + goto CLASS_CONTINUE; + } + goto CLASS_LITERAL; + } + + /* Handle POSIX class names. Perl allows a negation extension of the + form [:^name:]. A square bracket that doesn't match the syntax is + treated as a literal. We also recognize the POSIX constructions + [.ch.] and [=ch=] ("collating elements") and fault them, as Perl + 5.6 and 5.8 do. */ + + if (c == CHAR_LEFT_SQUARE_BRACKET && + ptrend - ptr >= 3 && + (*ptr == CHAR_COLON || *ptr == CHAR_DOT || + *ptr == CHAR_EQUALS_SIGN) && + check_posix_syntax(ptr, ptrend, &tempptr)) + { + BOOL posix_negate = FALSE; + int posix_class; + + /* Perl treats a hyphen before a POSIX class as a literal, not the + start of a range. However, it gives a warning in its warning mode. PCRE + does not have a warning mode, so we give an error, because this is + likely an error on the user's part. */ + + if (class_range_state == RANGE_STARTED) + { + errorcode = ERR50; + goto FAILED; + } + + if (*ptr != CHAR_COLON) + { + errorcode = ERR13; + goto FAILED_BACK; + } + + if (*(++ptr) == CHAR_CIRCUMFLEX_ACCENT) + { + posix_negate = TRUE; + ptr++; + } + + posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); + if (posix_class < 0) + { + errorcode = ERR30; + goto FAILED; + } + ptr = tempptr + 2; + + /* Perl treats a hyphen after a POSIX class as a literal, not the + start of a range. However, it gives a warning in its warning mode. PCRE + does not have a warning mode, so we give an error, because this is + likely an error on the user's part. */ + + if (ptr < ptrend && *ptr == CHAR_MINUS) + { + errorcode = ERR50; + goto FAILED; + } + + /* Set "a hyphen is not the start of a range" just in case the POSIX + class is followed by \E or \Q\E (possibly repeated - fuzzers do that + kind of thing) and *then* a hyphen. This causes that hyphen to be + treated as a literal. I don't think it's worth setting up special + apparatus to do otherwise. */ + + class_range_state = RANGE_NO; + + /* When PCRE2_UCP is set, some of the POSIX classes are converted to + use Unicode properties \p or \P or, in one case, \h or \H. The + substitutes table has two values per class, containing the type and + value of a \p or \P item. The special cases are specified with a + negative type: a non-zero value causes \h or \H to be used, and a zero + value falls through to behave like a non-UCP POSIX class. */ + +#ifdef SUPPORT_UNICODE + if ((options & PCRE2_UCP) != 0) + { + int ptype = posix_substitutes[2*posix_class]; + int pvalue = posix_substitutes[2*posix_class + 1]; + if (ptype >= 0) + { + *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_P : ESC_p); + *parsed_pattern++ = (ptype << 16) | pvalue; + goto CLASS_CONTINUE; + } + + if (pvalue != 0) + { + *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_H : ESC_h); + goto CLASS_CONTINUE; + } + + /* Fall through */ + } +#endif /* SUPPORT_UNICODE */ + + /* Non-UCP POSIX class */ + + *parsed_pattern++ = posix_negate? META_POSIX_NEG : META_POSIX; + *parsed_pattern++ = posix_class; + } + + /* Handle potential start of range */ + + else if (c == CHAR_MINUS && class_range_state >= RANGE_OK_ESCAPED) + { + *parsed_pattern++ = (class_range_state == RANGE_OK_LITERAL)? + META_RANGE_LITERAL : META_RANGE_ESCAPED; + class_range_state = RANGE_STARTED; + } + + /* Handle a literal character */ + + else if (c != CHAR_BACKSLASH) + { + CLASS_LITERAL: + if (class_range_state == RANGE_STARTED) + { + if (c == parsed_pattern[-2]) /* Optimize one-char range */ + parsed_pattern--; + else if (parsed_pattern[-2] > c) /* Check range is in order */ + { + errorcode = ERR8; + goto FAILED_BACK; + } + else + { + if (!char_is_literal && parsed_pattern[-1] == META_RANGE_LITERAL) + parsed_pattern[-1] = META_RANGE_ESCAPED; + PARSED_LITERAL(c, parsed_pattern); + } + class_range_state = RANGE_NO; + } + else /* Potential start of range */ + { + class_range_state = char_is_literal? + RANGE_OK_LITERAL : RANGE_OK_ESCAPED; + PARSED_LITERAL(c, parsed_pattern); + } + } + + /* Handle escapes in a class */ + + else + { + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, + options, TRUE, cb); + + if (errorcode != 0) goto FAILED; + if (escape == 0) /* Escaped character code point is in c */ + { + char_is_literal = FALSE; + goto CLASS_LITERAL; + } + + /* These three escapes do not alter the class range state. */ + + if (escape == ESC_b) + { + c = CHAR_BS; /* \b is backspace in a class */ + char_is_literal = FALSE; + goto CLASS_LITERAL; + } + + else if (escape == ESC_Q) + { + inescq = TRUE; /* Enter literal mode */ + goto CLASS_CONTINUE; + } + + else if (escape == ESC_E) /* Ignore orphan \E */ + goto CLASS_CONTINUE; + + /* The second part of a range can be a single-character escape + sequence (detected above), but not any of the other escapes. Perl + treats a hyphen as a literal in such circumstances. However, in Perl's + warning mode, a warning is given, so PCRE now faults it, as it is + almost certainly a mistake on the user's part. */ + + if (class_range_state == RANGE_STARTED) + { + errorcode = ERR50; + goto FAILED; + } + + /* Of the remaining escapes, only those that define characters are + allowed in a class. None may start a range. */ + + class_range_state = RANGE_NO; + switch(escape) + { + case ESC_N: + errorcode = ERR71; /* Not supported in a class */ + goto FAILED; + + case ESC_H: + case ESC_h: + case ESC_V: + case ESC_v: + *parsed_pattern++ = META_ESCAPE + escape; + break; + + /* These escapes are converted to Unicode property tests when + PCRE2_UCP is set. */ + + case ESC_d: + case ESC_D: + case ESC_s: + case ESC_S: + case ESC_w: + case ESC_W: + if ((options & PCRE2_UCP) == 0) + { + *parsed_pattern++ = META_ESCAPE + escape; + } + else + { + *parsed_pattern++ = META_ESCAPE + + ((escape == ESC_d || escape == ESC_s || escape == ESC_w)? + ESC_p : ESC_P); + switch(escape) + { + case ESC_d: + case ESC_D: + *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; + break; + + case ESC_s: + case ESC_S: + *parsed_pattern++ = PT_SPACE << 16; + break; + + case ESC_w: + case ESC_W: + *parsed_pattern++ = PT_WORD << 16; + break; + } + } + break; + + /* Explicit Unicode property matching */ + + case ESC_P: + case ESC_p: +#ifdef SUPPORT_UNICODE + { + BOOL negated; + uint16_t ptype = 0, pdata = 0; + if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb)) + goto FAILED; + if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; + *parsed_pattern++ = META_ESCAPE + escape; + *parsed_pattern++ = (ptype << 16) | pdata; + } +#else + errorcode = ERR45; + goto FAILED; +#endif + break; /* End \P and \p */ + + default: /* All others are not allowed in a class */ + errorcode = ERR7; + goto FAILED_BACK; + } + } + + /* Proceed to next thing in the class. */ + + CLASS_CONTINUE: + if (ptr >= ptrend) + { + errorcode = ERR6; /* Missing terminating ']' */ + goto FAILED; + } + GETCHARINCTEST(c, ptr); + if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; + } /* End of class-processing loop */ + + if (class_range_state == RANGE_STARTED) + { + parsed_pattern[-1] = CHAR_MINUS; + class_range_state = RANGE_NO; + } + + *parsed_pattern++ = META_CLASS_END; + break; /* End of character class */ + + + /* ---- Opening parenthesis ---- */ + + case CHAR_LEFT_PARENTHESIS: + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* If ( is not followed by ? it is either a capture or a special verb. */ + + if (*ptr != CHAR_QUESTION_MARK) + { + const char *vn; + + /* Handle capturing brackets (or non-capturing if auto-capture is turned + off). */ + + if (*ptr != CHAR_ASTERISK) + { + nest_depth++; + if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) + { + cb->bracount++; + *parsed_pattern++ = META_CAPTURE | cb->bracount; + } + else *parsed_pattern++ = META_NOCAPTURE; + } + + + /* ---- Handle (*VERB) and (*VERB:NAME) ---- */ + + /* Do nothing for (*) so it gives a "bad quantifier" error rather than + "(*MARK) must have an argument". */ + + else if (ptrend - ptr > 1 && ptr[1] != CHAR_RIGHT_PARENTHESIS) + { + vn = verbnames; + if (!read_name(&ptr, ptrend, 0, &offset, &name, &namelen, &errorcode, + cb)) goto FAILED; + if (ptr >= ptrend || (*ptr != CHAR_COLON && + *ptr != CHAR_RIGHT_PARENTHESIS)) + { + errorcode = ERR60; /* Malformed */ + goto FAILED; + } + + /* Scan the table of verb names */ + + for (i = 0; i < verbcount; i++) + { + if (namelen == verbs[i].len && + PRIV(strncmp_c8)(name, vn, namelen) == 0) + break; + vn += verbs[i].len + 1; + } + + if (i >= verbcount) + { + errorcode = ERR60; /* Verb not recognized */ + goto FAILED; + } + + /* An empty argument is treated as no argument. */ + + if (*ptr == CHAR_COLON && ptr + 1 < ptrend && + ptr[1] == CHAR_RIGHT_PARENTHESIS) + ptr++; /* Advance to the closing parens */ + + /* Check for mandatory non-empty argument; this is (*MARK) */ + + if (verbs[i].has_arg > 0 && *ptr != CHAR_COLON) + { + errorcode = ERR66; + goto FAILED; + } + + /* It appears that Perl allows any characters whatsoever, other than a + closing parenthesis, to appear in arguments ("names"), so we no longer + insist on letters, digits, and underscores. Perl does not, however, do + any interpretation within arguments, and has no means of including a + closing parenthesis. PCRE supports escape processing but only when it + is requested by an option. We set inverbname TRUE here, and let the + main loop take care of this so that escape and \x processing is done by + the main code above. */ + + if (*ptr++ == CHAR_COLON) /* Skip past : or ) */ + { + if (verbs[i].has_arg < 0) /* Argument is forbidden */ + { + errorcode = ERR59; + goto FAILED; + } + *parsed_pattern++ = verbs[i].meta + + ((verbs[i].meta != META_MARK)? 0x00010000u:0); + verblengthptr = parsed_pattern++; + verbnamestart = ptr; + inverbname = TRUE; + } + else /* No verb "name" argument */ + { + *parsed_pattern++ = verbs[i].meta; + } + } /* End of (*VERB) handling */ + break; /* Done with this parenthesis */ + } /* End of groups that don't start with (? */ + + + /* ---- Items starting (? ---- */ + + /* The type of item is determined by what follows (?. Handle (?| and option + changes under "default" because both need a new block on the nest stack. + Comments starting with (?# are handled above. Note that there is some + ambiguity about the sequence (?- because if a digit follows it's a relative + recursion or subroutine call whereas otherwise it's an option unsetting. */ + + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + switch(*ptr) + { + default: + if (*ptr == CHAR_MINUS && ptrend - ptr > 1 && IS_DIGIT(ptr[1])) + goto RECURSION_BYNUMBER; /* The + case is handled by CHAR_PLUS */ + + /* We now have either (?| or a (possibly empty) option setting, + optionally followed by a non-capturing group. */ + + nest_depth++; + if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace); + else if (++top_nest >= end_nests) + { + errorcode = ERR84; + goto FAILED; + } + top_nest->nest_depth = nest_depth; + top_nest->flags = 0; + if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED; + if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES; + + /* Start of non-capturing group that resets the capture count for each + branch. */ + + if (*ptr == CHAR_VERTICAL_LINE) + { + top_nest->reset_group = (uint16_t)cb->bracount; + top_nest->max_group = (uint16_t)cb->bracount; + top_nest->flags |= NSF_RESET; + cb->external_flags |= PCRE2_DUPCAPUSED; + *parsed_pattern++ = META_NOCAPTURE; + ptr++; + } + + /* Scan for options imsxJU. We need to keep track of (?x) and (?J) for + use while scanning. The other options are used during the compiling + phases. */ + + else + { + top_nest->reset_group = 0; + top_nest->max_group = 0; + set = unset = 0; + optset = &set; + + while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS && + *ptr != CHAR_COLON) + { + switch (*ptr++) + { + case CHAR_MINUS: optset = &unset; break; + + case CHAR_J: /* Record that it changed in the external options */ + *optset |= PCRE2_DUPNAMES; + cb->external_flags |= PCRE2_JCHANGED; + break; + + case CHAR_i: *optset |= PCRE2_CASELESS; break; + case CHAR_m: *optset |= PCRE2_MULTILINE; break; + case CHAR_s: *optset |= PCRE2_DOTALL; break; + case CHAR_x: *optset |= PCRE2_EXTENDED; break; + case CHAR_U: *optset |= PCRE2_UNGREEDY; break; + + default: + errorcode = ERR11; + ptr--; /* Correct the offset */ + goto FAILED; + } + } + options = (options | set) & (~unset); + + /* If the options ended with ')' this is not the start of a nested + group with option changes, so the options change at this level. + In this case, if the previous level set up a nest block, discard the + one we have just created. Otherwise adjust it for the previous level. + If the options ended with ':' we are starting a non-capturing group, + possibly with an options setting. */ + + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + if (*ptr++ == CHAR_RIGHT_PARENTHESIS) + { + nest_depth--; /* This is not a nested group after all. */ + if (top_nest > (nest_save *)(cb->start_workspace) && + (top_nest-1)->nest_depth == nest_depth) top_nest--; + else top_nest->nest_depth = nest_depth; + } + else *parsed_pattern++ = META_NOCAPTURE; + + /* If nothing changed, no need to record. */ + + if (set != 0 || unset != 0) + { + *parsed_pattern++ = META_OPTIONS; + *parsed_pattern++ = options; + } + } /* End options processing */ + break; /* End default case after (? */ + + + /* ---- Python syntax support ---- */ + + case CHAR_P: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* (?P is the same as (?, which defines a named group. */ + + if (*ptr == CHAR_LESS_THAN_SIGN) + { + terminator = CHAR_GREATER_THAN_SIGN; + goto DEFINE_NAME; + } + + /* (?P>name) is the same as (?&name), which is a recursion or subroutine + call. */ + + if (*ptr == CHAR_GREATER_THAN_SIGN) goto RECURSE_BY_NAME; + + /* (?P=name) is the same as \k, a back reference by name. Anything + else after (?P is an error. */ + + if (*ptr != CHAR_EQUALS_SIGN) + { + errorcode = ERR41; + goto FAILED; + } + if (!read_name(&ptr, ptrend, CHAR_RIGHT_PARENTHESIS, &offset, &name, + &namelen, &errorcode, cb)) goto FAILED; + *parsed_pattern++ = META_BACKREF_BYNAME; + *parsed_pattern++ = namelen; + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; /* End of (?P processing */ + + + /* ---- Recursion/subroutine calls by number ---- */ + + case CHAR_R: + i = 0; /* (?R) == (?R0) */ + ptr++; + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR58; + goto FAILED; + } + goto SET_RECURSION; + + /* An item starting (?- followed by a digit comes here via the "default" + case because (?- followed by a non-digit is an options setting. */ + + case CHAR_PLUS: + if (ptrend - ptr < 2 || !IS_DIGIT(ptr[1])) + { + errorcode = ERR29; /* Missing number */ + goto FAILED; + } + /* Fall through */ + + case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: + case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: + RECURSION_BYNUMBER: + if (!read_number(&ptr, ptrend, + (IS_DIGIT(*ptr))? -1:(int)(cb->bracount), /* + and - are relative */ + MAX_GROUP_NUMBER, ERR61, + &i, &errorcode)) goto FAILED; + if (i < 0) /* NB (?0) is permitted */ + { + errorcode = ERR15; /* Unknown group */ + goto FAILED_BACK; + } + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + goto UNCLOSED_PARENTHESIS; + + SET_RECURSION: + *parsed_pattern++ = META_RECURSE | (uint32_t)i; + offset = (PCRE2_SIZE)(ptr - cb->start_pattern); + ptr++; + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; /* End of recursive call by number handling */ + + + /* ---- Recursion/subroutine calls by name ---- */ + + case CHAR_AMPERSAND: + RECURSE_BY_NAME: + if (!read_name(&ptr, ptrend, CHAR_RIGHT_PARENTHESIS, &offset, &name, + &namelen, &errorcode, cb)) goto FAILED; + *parsed_pattern++ = META_RECURSE_BYNAME; + *parsed_pattern++ = namelen; + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; + + /* ---- Callout with numerical or string argument ---- */ + + case CHAR_C: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* If the previous item was a condition starting (?(? an assertion, + optionally preceded by a callout, is expected. This is checked later on, + during actual compilation. However we need to identify this kind of + assertion in this pass because it must not be qualified. The value of + expect_cond_assert is set to 2 after (?(? is processed. We decrement it + for a callout - still leaving a positive value that identifies the + assertion. Multiple callouts or any other items will make it zero or + less, which doesn't matter because they will cause an error later. */ + + expect_cond_assert = prev_expect_cond_assert - 1; + + /* If previous_callout is not NULL, it means this follows a previous + callout. If it was a manual callout, do nothing; this means its "length + of next pattern item" field will remain zero. If it was an automatic + callout, abolish it. */ + + if (previous_callout != NULL && (options & PCRE2_AUTO_CALLOUT) != 0 && + previous_callout == parsed_pattern - 4 && + parsed_pattern[-1] == 255) + parsed_pattern = previous_callout; + + /* Save for updating next pattern item length, and skip one item before + completing. */ + + previous_callout = parsed_pattern; + after_manual_callout = 1; + + /* Handle a string argument; specific delimiter is required. */ + + if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr)) + { + PCRE2_SIZE calloutlength; + PCRE2_SPTR startptr = ptr; + + delimiter = 0; + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + { + if (*ptr == PRIV(callout_start_delims)[i]) + { + delimiter = PRIV(callout_end_delims)[i]; + break; + } + } + if (delimiter == 0) + { + errorcode = ERR82; + goto FAILED; + } + + *parsed_pattern = META_CALLOUT_STRING; + parsed_pattern += 3; /* Skip pattern info */ + + for (;;) + { + if (++ptr >= ptrend) + { + errorcode = ERR81; + ptr = startptr; /* To give a more useful message */ + goto FAILED; + } + if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter)) + break; + } + + calloutlength = (PCRE2_SIZE)(ptr - startptr); + if (calloutlength > UINT32_MAX) + { + errorcode = ERR72; + goto FAILED; + } + *parsed_pattern++ = (uint32_t)calloutlength; + offset = (PCRE2_SIZE)(startptr - cb->start_pattern); + PUTOFFSET(offset, parsed_pattern); + } + + /* Handle a callout with an optional numerical argument, which must be + less than or equal to 255. A missing argument gives 0. */ + + else + { + int n = 0; + *parsed_pattern = META_CALLOUT_NUMBER; /* Numerical callout */ + parsed_pattern += 3; /* Skip pattern info */ + while (ptr < ptrend && IS_DIGIT(*ptr)) + { + n = n * 10 + *ptr++ - CHAR_0; + if (n > 255) + { + errorcode = ERR38; + goto FAILED; + } + } + *parsed_pattern++ = n; + } + + /* Both formats must have a closing parenthesis */ + + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR39; + goto FAILED; + } + ptr++; + + /* Remember the offset to the next item in the pattern, and set a default + length. This should get updated after the next item is read. */ + + previous_callout[1] = ptr - cb->start_pattern; + previous_callout[2] = 0; + break; /* End callout */ + + + /* ---- Conditional group ---- */ + + /* A condition can be an assertion, a number (referring to a numbered + group's having been set), a name (referring to a named group), or 'R', + referring to overall recursion. R and R&name are also permitted + for recursion state tests. Numbers may be preceded by + or - to specify a + relative group number. + + There are several syntaxes for testing a named group: (?(name)) is used + by Python; Perl 5.10 onwards uses (?() or (?('name')). + + There are two unfortunate ambiguities. 'R' can be the recursive thing or + the name 'R' (and similarly for 'R' followed by digits). 'DEFINE' can be + the Perl DEFINE feature or the Python named test. We look for a name + first; if not found, we try the other case. + + For compatibility with auto-callouts, we allow a callout to be specified + before a condition that is an assertion. */ + + case CHAR_LEFT_PARENTHESIS: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + nest_depth++; + + /* If the next character is ? there must be an assertion next (optionally + preceded by a callout). We do not check this here, but instead we set + expect_cond_assert to 2. If this is still greater than zero (callouts + decrement it) when the next assertion is read, it will be marked as a + condition that must not be repeated. A value greater than zero also + causes checking that an assertion (possibly with callout) follows. */ + + if (*ptr == CHAR_QUESTION_MARK) + { + *parsed_pattern++ = META_COND_ASSERT; + ptr--; /* Pull pointer back to the opening parenthesis. */ + expect_cond_assert = 2; + break; /* End of conditional */ + } + + /* Handle (?([+-]number)... */ + + if (read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i, + &errorcode)) + { + if (i <= 0) + { + errorcode = ERR15; + goto FAILED; + } + *parsed_pattern++ = META_COND_NUMBER; + offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2); + PUTOFFSET(offset, parsed_pattern); + *parsed_pattern++ = i; + } + else if (errorcode != 0) goto FAILED; /* Number too big */ + + /* No number found. Handle the special case (?(VERSION[>]=n.m)... */ + + else if (ptrend - ptr >= 10 && + PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 && + ptr[7] != CHAR_RIGHT_PARENTHESIS) + { + uint32_t ge = 0; + int major = 0; + int minor = 0; + + ptr += 7; + if (*ptr == CHAR_GREATER_THAN_SIGN) + { + ge = 1; + ptr++; + } + + /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT + references its argument twice. */ + + if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr))) + goto BAD_VERSION_CONDITION; + + if (!read_number(&ptr, ptrend, -1, 1000, ERR79, &major, &errorcode)) + goto FAILED; + + if (ptr >= ptrend) goto BAD_VERSION_CONDITION; + if (*ptr == CHAR_DOT) + { + if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; + if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode)) + goto FAILED; + if (minor < 10) minor *= 10; + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + goto BAD_VERSION_CONDITION; + } + + *parsed_pattern++ = META_COND_VERSION; + *parsed_pattern++ = ge; + *parsed_pattern++ = major; + *parsed_pattern++ = minor; + } + + /* All the remaining cases now require us to read a name. We cannot at + this stage distinguish ambiguous cases such as (?(R12) which might be a + recursion test by number or a name, because the named groups have not yet + all been identified. Those cases are treated as names, but given a + different META code. */ + + else + { + BOOL was_r_ampersand = FALSE; + + if (*ptr == CHAR_R && ptrend - ptr > 1 && ptr[1] == CHAR_AMPERSAND) + { + terminator = CHAR_RIGHT_PARENTHESIS; + was_r_ampersand = TRUE; + ptr++; + } + else if (*ptr == CHAR_LESS_THAN_SIGN) + terminator = CHAR_GREATER_THAN_SIGN; + else if (*ptr == CHAR_APOSTROPHE) + terminator = CHAR_APOSTROPHE; + else + { + terminator = CHAR_RIGHT_PARENTHESIS; + ptr--; /* Point to char before name */ + } + if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen, + &errorcode, cb)) goto FAILED; + + /* Handle (?(R&name) */ + + if (was_r_ampersand) + { + *parsed_pattern = META_COND_RNAME; + ptr--; /* Back to closing parens */ + } + + /* Handle (?(name). If the name is "DEFINE" we identify it with a + special code. Likewise if the name consists of R followed only by + digits. Otherwise, handle it like a quoted name. */ + + else if (terminator == CHAR_RIGHT_PARENTHESIS) + { + if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0) + *parsed_pattern = META_COND_DEFINE; + else + { + for (i = 1; i < (int)namelen; i++) + if (!IS_DIGIT(name[i])) break; + *parsed_pattern = (*name == CHAR_R && i >= (int)namelen)? + META_COND_RNUMBER : META_COND_NAME; + } + ptr--; /* Back to closing parens */ + } + + /* Handle (?('name') or (?() */ + + else *parsed_pattern = META_COND_NAME; + + /* All these cases except DEFINE end with the name length and offset; + DEFINE just has an offset (for the "too many branches" error). */ + + if (*parsed_pattern++ != META_COND_DEFINE) *parsed_pattern++ = namelen; + PUTOFFSET(offset, parsed_pattern); + } /* End cases that read a name */ + + /* Check the closing parenthesis of the condition */ + + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR24; + goto FAILED; + } + ptr++; + break; /* End of condition processing */ + + + /* ---- Atomic group ---- */ + + case CHAR_GREATER_THAN_SIGN: + *parsed_pattern++ = META_ATOMIC; + nest_depth++; + ptr++; + break; + + + /* ---- Lookahead assertions ---- */ + + case CHAR_EQUALS_SIGN: + *parsed_pattern++ = META_LOOKAHEAD; + ptr++; + goto POST_ASSERTION; + + case CHAR_EXCLAMATION_MARK: + *parsed_pattern++ = META_LOOKAHEADNOT; + ptr++; + goto POST_ASSERTION; + + + /* ---- Lookbehind assertions ---- */ + + /* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the + start of the name of a capturing group. */ + + case CHAR_LESS_THAN_SIGN: + if (ptrend - ptr <= 1 || + (ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK)) + { + terminator = CHAR_GREATER_THAN_SIGN; + goto DEFINE_NAME; + } + *parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)? + META_LOOKBEHIND : META_LOOKBEHINDNOT; + *has_lookbehind = TRUE; + offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2); + PUTOFFSET(offset, parsed_pattern); + ptr += 2; + /* Fall through */ + + /* If the previous item was a condition starting (?(? an assertion, + optionally preceded by a callout, is expected. This is checked later on, + during actual compilation. However we need to identify this kind of + assertion in this pass because it must not be qualified. The value of + expect_cond_assert is set to 2 after (?(? is processed. We decrement it + for a callout - still leaving a positive value that identifies the + assertion. Multiple callouts or any other items will make it zero or + less, which doesn't matter because they will cause an error later. */ + + POST_ASSERTION: + nest_depth++; + if (prev_expect_cond_assert > 0) + { + if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace); + else if (++top_nest >= end_nests) + { + errorcode = ERR84; + goto FAILED; + } + top_nest->nest_depth = nest_depth; + top_nest->flags = NSF_CONDASSERT; + if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED; + if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES; + } + break; + + + /* ---- Define a named group ---- */ + + /* A named group may be defined as (?'name') or (?). In the latter + case we jump to DEFINE_NAME from the disambiguation of (?< above with the + terminator set to '>'. */ + + case CHAR_APOSTROPHE: + terminator = CHAR_APOSTROPHE; /* Terminator */ + + DEFINE_NAME: + if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen, + &errorcode, cb)) goto FAILED; + + /* We have a name for this capturing group. It is also assigned a number, + which is its primary means of identification. */ + + cb->bracount++; + *parsed_pattern++ = META_CAPTURE | cb->bracount; + nest_depth++; + + /* Check not too many names */ + + if (cb->names_found >= MAX_NAME_COUNT) + { + errorcode = ERR49; + goto FAILED; + } + + /* Adjust the entry size to accommodate the longest name found. */ + + if (namelen + IMM2_SIZE + 1 > cb->name_entry_size) + cb->name_entry_size = (uint16_t)(namelen + IMM2_SIZE + 1); + + /* Scan the list to check for duplicates. For duplicate names, if the + number is the same, break the loop, which causes the name to be + discarded; otherwise, if DUPNAMES is not set, give an error. + If it is set, allow the name with a different number, but continue + scanning in case this is a duplicate with the same number. For + non-duplicate names, give an error if the number is duplicated. */ + + isdupname = FALSE; + ng = cb->named_groups; + for (i = 0; i < cb->names_found; i++, ng++) + { + if (namelen == ng->length && + PRIV(strncmp)(name, ng->name, (PCRE2_SIZE)namelen) == 0) + { + if (ng->number == cb->bracount) break; + if ((options & PCRE2_DUPNAMES) == 0) + { + errorcode = ERR43; + goto FAILED; + } + isdupname = ng->isdup = TRUE; /* Mark as a duplicate */ + cb->dupnames = TRUE; /* Duplicate names exist */ + } + else if (ng->number == cb->bracount) + { + errorcode = ERR65; + goto FAILED; + } + } + + if (i < cb->names_found) break; /* Ignore duplicate with same number */ + + /* Increase the list size if necessary */ + + if (cb->names_found >= cb->named_group_list_size) + { + uint32_t newsize = cb->named_group_list_size * 2; + named_group *newspace = + cb->cx->memctl.malloc(newsize * sizeof(named_group), + cb->cx->memctl.memory_data); + if (newspace == NULL) + { + errorcode = ERR21; + goto FAILED; + } + + memcpy(newspace, cb->named_groups, + cb->named_group_list_size * sizeof(named_group)); + if (cb->named_group_list_size > NAMED_GROUP_LIST_SIZE) + cb->cx->memctl.free((void *)cb->named_groups, + cb->cx->memctl.memory_data); + cb->named_groups = newspace; + cb->named_group_list_size = newsize; + } + + /* Add this name to the list */ + + cb->named_groups[cb->names_found].name = name; + cb->named_groups[cb->names_found].length = (uint16_t)namelen; + cb->named_groups[cb->names_found].number = cb->bracount; + cb->named_groups[cb->names_found].isdup = (uint16_t)isdupname; + cb->names_found++; + break; + } /* End of (? switch */ + break; /* End of ( handling */ + + + /* ---- Branch terminators ---- */ + + /* Alternation: reset the capture count if we are in a (?| group. */ + + case CHAR_VERTICAL_LINE: + if (top_nest != NULL && top_nest->nest_depth == nest_depth && + (top_nest->flags & NSF_RESET) != 0) + { + if (cb->bracount > top_nest->max_group) + top_nest->max_group = (uint16_t)cb->bracount; + cb->bracount = top_nest->reset_group; + } + *parsed_pattern++ = META_ALT; + break; + + /* End of group; reset the capture count to the maximum if we are in a (?| + group and/or reset the extended and dupnames options. Disallow quantifier + for a condition that is an assertion. */ + + case CHAR_RIGHT_PARENTHESIS: + okquantifier = TRUE; + if (top_nest != NULL && top_nest->nest_depth == nest_depth) + { + if ((top_nest->flags & NSF_RESET) != 0 && + top_nest->max_group > cb->bracount) + cb->bracount = top_nest->max_group; + if ((top_nest->flags & NSF_EXTENDED) != 0) options |= PCRE2_EXTENDED; + else options &= ~PCRE2_EXTENDED; + if ((top_nest->flags & NSF_DUPNAMES) != 0) options |= PCRE2_DUPNAMES; + else options &= ~PCRE2_DUPNAMES; + if ((top_nest->flags & NSF_CONDASSERT) != 0) + okquantifier = FALSE; + if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; + else top_nest--; + } + if (nest_depth == 0) /* Unmatched closing parenthesis */ + { + errorcode = ERR22; + goto FAILED_BACK; + } + nest_depth--; + *parsed_pattern++ = META_KET; + break; + } /* End of switch on pattern character */ + } /* End of main character scan loop */ + +/* End of pattern reached. Check for missing ) at the end of a verb name. */ + +if (inverbname && ptr >= ptrend) + { + errorcode = ERR60; + goto FAILED; } -/* Control never gets here */ + +/* Manage callout for the final item */ + +parsed_pattern = manage_callouts(ptr, &previous_callout, options, + parsed_pattern, cb); + +/* Terminate the parsed pattern, then return success if all groups are closed. +Otherwise we have unclosed parentheses. */ + +if (parsed_pattern >= parsed_pattern_end) + { + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ + goto FAILED; + } +*parsed_pattern = META_END; +if (nest_depth == 0) return 0; + +UNCLOSED_PARENTHESIS: +errorcode = ERR14; + +/* Come here for all failures. */ + +FAILED: +cb->erroroffset = (PCRE2_SIZE)(ptr - cb->start_pattern); +return errorcode; + +/* Some errors need to indicate the previous character. */ + +FAILED_BACK: +ptr--; +goto FAILED; + +/* This failure happens several times. */ + +BAD_VERSION_CONDITION: +errorcode = ERR79; +goto FAILED; } @@ -1210,1294 +4173,3565 @@ for (;;) +#ifdef SUPPORT_UNICODE /************************************************* -* Scan compiled branch for non-emptiness * +* Get othercase range * *************************************************/ -/* This function scans through a branch of a compiled pattern to see whether it -can match the empty string. It is called from could_be_empty() below and from -compile_branch() when checking for an unlimited repeat of a group that can -match nothing. Note that first_significant_code() skips over backward and -negative forward assertions when its final argument is TRUE. If we hit an -unclosed bracket, we return "empty" - this means we've struck an inner bracket -whose current branch will already have been scanned. +/* This function is passed the start and end of a class range in UCP mode. It +searches up the characters, looking for ranges of characters in the "other" +case. Each call returns the next one, updating the start address. A character +with multiple other cases is returned on its own with a special return value. Arguments: - code points to start of search - endcode points to where to stop - utf TRUE if in UTF mode - cb compile data - recurses chain of recurse_check to catch mutual recursion - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL -could_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf, - compile_block *cb, recurse_check *recurses) -{ -register PCRE2_UCHAR c; -recurse_check this_recurse; - -for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); - code < endcode; - code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) - { - PCRE2_SPTR ccode; - - c = *code; - - /* Skip over forward assertions; the other assertions are skipped by - first_significant_code() with a TRUE final argument. */ - - if (c == OP_ASSERT) - { - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - - /* For a recursion/subroutine call, if its end has been reached, which - implies a backward reference subroutine call, we can scan it. If it's a - forward reference subroutine call, we can't. To detect forward reference - we have to scan up the list that is kept in the workspace. This function is - called only when doing the real compile, not during the pre-compile that - measures the size of the compiled pattern. */ - - if (c == OP_RECURSE) - { - PCRE2_SPTR scode = cb->start_code + GET(code, 1); - PCRE2_SPTR endgroup = scode; - BOOL empty_branch; - - /* Test for forward reference or uncompleted reference. This is disabled - when called to scan a completed pattern by setting cb->start_workspace to - NULL. */ - - if (cb->start_workspace != NULL) - { - PCRE2_SPTR tcode; - for (tcode = cb->start_workspace; tcode < cb->hwm; tcode += LINK_SIZE) - if ((int)GET(tcode, 0) == (int)(code + 1 - cb->start_code)) return TRUE; - if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ - } - - /* If the reference is to a completed group, we need to detect whether this - is a recursive call, as otherwise there will be an infinite loop. If it is - a recursion, just skip over it. Simple recursions are easily detected. For - mutual recursions we keep a chain on the stack. */ - - do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); - if (code >= scode && code <= endgroup) continue; /* Simple recursion */ - else - { - recurse_check *r = recurses; - for (r = recurses; r != NULL; r = r->prev) - if (r->group == scode) break; - if (r != NULL) continue; /* Mutual recursion */ - } - - /* Completed reference; scan the referenced group, remembering it on the - stack chain to detect mutual recursions. */ - - empty_branch = FALSE; - this_recurse.prev = recurses; - this_recurse.group = scode; - - do - { - if (could_be_empty_branch(scode, endcode, utf, cb, &this_recurse)) - { - empty_branch = TRUE; - break; - } - scode += GET(scode, 1); - } - while (*scode == OP_ALT); - - if (!empty_branch) return FALSE; /* All branches are non-empty */ - continue; - } - - /* Groups with zero repeats can of course be empty; skip them. */ - - if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO || - c == OP_BRAPOSZERO) - { - code += PRIV(OP_lengths)[c]; - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - - /* A nested group that is already marked as "could be empty" can just be - skipped. */ - - if (c == OP_SBRA || c == OP_SBRAPOS || - c == OP_SCBRA || c == OP_SCBRAPOS) - { - do code += GET(code, 1); while (*code == OP_ALT); - c = *code; - continue; - } - - /* For other groups, scan the branches. */ - - if (c == OP_BRA || c == OP_BRAPOS || - c == OP_CBRA || c == OP_CBRAPOS || - c == OP_ONCE || c == OP_ONCE_NC || - c == OP_COND || c == OP_SCOND) - { - BOOL empty_branch; - if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ - - /* If a conditional group has only one branch, there is a second, implied, - empty branch, so just skip over the conditional, because it could be empty. - Otherwise, scan the individual branches of the group. */ - - if (c == OP_COND && code[GET(code, 1)] != OP_ALT) - code += GET(code, 1); - else - { - empty_branch = FALSE; - do - { - if (!empty_branch && could_be_empty_branch(code, endcode, utf, cb, - recurses)) empty_branch = TRUE; - code += GET(code, 1); - } - while (*code == OP_ALT); - if (!empty_branch) return FALSE; /* All branches are non-empty */ - } - - c = *code; - continue; - } - - /* Handle the other opcodes */ - - switch (c) - { - /* Check for quantifiers after a class. XCLASS is used for classes that - cannot be represented just by a bit map. This includes negated single - high-valued characters. The length in PRIV(OP_lengths)[] is zero; the - actual length is stored in the compiled code, so we must update "code" - here. */ - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - case OP_XCLASS: - ccode = code += GET(code, 1); - goto CHECK_CLASS_REPEAT; -#endif - - case OP_CLASS: - case OP_NCLASS: - ccode = code + PRIV(OP_lengths)[OP_CLASS]; - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - CHECK_CLASS_REPEAT: -#endif - - switch (*ccode) - { - case OP_CRSTAR: /* These could be empty; continue */ - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - case OP_CRPOSSTAR: - case OP_CRPOSQUERY: - break; - - default: /* Non-repeat => class must match */ - case OP_CRPLUS: /* These repeats aren't empty */ - case OP_CRMINPLUS: - case OP_CRPOSPLUS: - return FALSE; - - case OP_CRRANGE: - case OP_CRMINRANGE: - case OP_CRPOSRANGE: - if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ - break; - } - break; - - /* Opcodes that must match a character */ - - case OP_ANY: - case OP_ALLANY: - case OP_ANYBYTE: - - case OP_PROP: - case OP_NOTPROP: - case OP_ANYNL: - - case OP_NOT_HSPACE: - case OP_HSPACE: - case OP_NOT_VSPACE: - case OP_VSPACE: - case OP_EXTUNI: - - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - - case OP_PLUS: - case OP_PLUSI: - case OP_MINPLUS: - case OP_MINPLUSI: - - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - - case OP_POSPLUS: - case OP_POSPLUSI: - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - - case OP_EXACT: - case OP_EXACTI: - case OP_NOTEXACT: - case OP_NOTEXACTI: - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSPLUS: - case OP_TYPEEXACT: - - return FALSE; - - /* These are going to continue, as they may be empty, but we have to - fudge the length for the \p and \P cases. */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPOSSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; - break; - - /* Same for these */ - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEPOSUPTO: - if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) - code += 2; - break; - - /* End of branch */ - - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - case OP_ALT: - return TRUE; - - /* In UTF-8 or UTF-16 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, - POSQUERY, UPTO, MINUPTO, and POSUPTO and their caseless and negative - versions may be followed by a multibyte character. */ - -#ifdef MAYBE_UTF_MULTI - case OP_STAR: - case OP_STARI: - case OP_NOTSTAR: - case OP_NOTSTARI: - - case OP_MINSTAR: - case OP_MINSTARI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - - case OP_POSSTAR: - case OP_POSSTARI: - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - - case OP_QUERY: - case OP_QUERYI: - case OP_NOTQUERY: - case OP_NOTQUERYI: - - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - - case OP_POSQUERY: - case OP_POSQUERYI: - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); - break; - - case OP_UPTO: - case OP_UPTOI: - case OP_NOTUPTO: - case OP_NOTUPTOI: - - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - - case OP_POSUPTO: - case OP_POSUPTOI: - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); - break; -#endif /* MAYBE_UTF_MULTI */ - - /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument - string. */ - - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - case OP_THEN_ARG: - code += code[1]; - break; - - /* None of the remaining opcodes are required to match a character. */ - - default: - break; - } - } - -return TRUE; -} - - - -/************************************************* -* Scan compiled regex for non-emptiness * -*************************************************/ - -/* This function is called to check for left recursive calls. We want to check -the current branch of the current pattern to see if it could match the empty -string. If it could, we must look outwards for branches at other levels, -stopping when we pass beyond the bracket which is the subject of the recursion. -This function is called only during the real compile, not during the -pre-compile. - -Arguments: - code points to start of the recursion - endcode points to where to stop (current RECURSE item) - bcptr points to the chain of current (unclosed) branch starts - utf TRUE if in UTF mode - cb compile data - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL -could_be_empty(PCRE2_SPTR code, PCRE2_SPTR endcode, branch_chain *bcptr, - BOOL utf, compile_block *cb) -{ -while (bcptr != NULL && bcptr->current_branch >= code) - { - if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cb, NULL)) - return FALSE; - bcptr = bcptr->outer; - } -return TRUE; -} - - - -/************************************************* -* Expand the workspace * -*************************************************/ - -/* This function is called during the second compiling phase, if the number of -forward references fills the existing workspace, which is originally a block on -the stack. A larger block is obtained from the heap unless the ultimate limit -has been reached or the increase will be rather small. - -Argument: pointer to the compile data block -Returns: 0 if all went well, else an error number + cptr points to starting character value; updated + d end value + ocptr where to put start of othercase range + odptr where to put end of othercase range + +Yield: -1 when no more + 0 when a range is returned + >0 the CASESET offset for char with multiple other cases + in this case, ocptr contains the original */ static int -expand_workspace(compile_block *cb) +get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, + uint32_t *odptr) { -PCRE2_UCHAR *newspace; -int newsize = cb->workspace_size * 2; -if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX; -if (cb->workspace_size >= COMPILE_WORK_SIZE_MAX || - newsize - cb->workspace_size < WORK_SIZE_SAFETY_MARGIN) - return ERR72; -newspace = cb->cx->memctl.malloc(CU2BYTES(newsize), cb->cx->memctl.memory_data); -if (newspace == NULL) return ERR21; -memcpy(newspace, cb->start_workspace, cb->workspace_size * sizeof(PCRE2_UCHAR)); -cb->hwm = (PCRE2_UCHAR *)newspace + (cb->hwm - cb->start_workspace); -if (cb->workspace_size > COMPILE_WORK_SIZE) - cb->cx->memctl.free((void *)cb->start_workspace, cb->cx->memctl.memory_data); -cb->start_workspace = newspace; -cb->workspace_size = newsize; +uint32_t c, othercase, next; +unsigned int co; + +/* Find the first character that has an other case. If it has multiple other +cases, return its case offset value. */ + +for (c = *cptr; c <= d; c++) + { + if ((co = UCD_CASESET(c)) != 0) + { + *ocptr = c++; /* Character that has the set */ + *cptr = c; /* Rest of input range */ + return (int)co; + } + if ((othercase = UCD_OTHERCASE(c)) != c) break; + } + +if (c > d) return -1; /* Reached end of range */ + +/* Found a character that has a single other case. Search for the end of the +range, which is either the end of the input range, or a character that has zero +or more than one other cases. */ + +*ocptr = othercase; +next = othercase + 1; + +for (++c; c <= d; c++) + { + if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; + next++; + } + +*odptr = next - 1; /* End of othercase range */ +*cptr = c; /* Rest of input range */ return 0; } +#endif /* SUPPORT_UNICODE */ /************************************************* -* Check for counted repeat * +* Add a character or range to a class (internal) * *************************************************/ -/* This function is called when a '{' is encountered in a place where it might -start a quantifier. It looks ahead to see if it really is a quantifier, that -is, one of the forms {ddd} {ddd,} or {ddd,ddd} where the ddds are digits. - -Argument: pointer to the first char after '{' -Returns: TRUE or FALSE -*/ - -static BOOL -is_counted_repeat(PCRE2_SPTR p) -{ -if (!IS_DIGIT(*p)) return FALSE; -p++; -while (IS_DIGIT(*p)) p++; -if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; - -if (*p++ != CHAR_COMMA) return FALSE; -if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; - -if (!IS_DIGIT(*p)) return FALSE; -p++; -while (IS_DIGIT(*p)) p++; - -return (*p == CHAR_RIGHT_CURLY_BRACKET); -} - - - -/************************************************* -* Handle escapes * -*************************************************/ - -/* This function is called when a \ has been encountered. It either returns a -positive value for a simple escape such as \d, or 0 for a data character, which -is placed in chptr. A backreference to group n is returned as negative n. On -entry, ptr is pointing at the \. On exit, it points the final code unit of the -escape sequence. +/* This function packages up the logic of adding a character or range of +characters to a class. The character values in the arguments will be within the +valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is +called only from within the "add to class" group of functions, some of which +are recursive and mutually recursive. The external entry point is +add_to_class(). Arguments: - ptrptr points to the pattern position pointer - chptr points to a returned data character - errorcodeptr points to the errorcode variable (containing zero) - options the current options bits - isclass TRUE if inside a character class - cb compile data block + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options word + cb compile data + start start of range character + end end of range character -Returns: zero => a data character - positive => a special escape sequence - negative => a back reference - on error, errorcodeptr is set non-zero +Returns: the number of < 256 characters added + the pointer to extra data is updated */ -static int -check_escape(PCRE2_SPTR *ptrptr, uint32_t *chptr, int *errorcodeptr, - uint32_t options, BOOL isclass, compile_block *cb) +static unsigned int +add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, + uint32_t options, compile_block *cb, uint32_t start, uint32_t end) { -BOOL utf = (options & PCRE2_UTF) != 0; -PCRE2_SPTR ptr = *ptrptr + 1; -register uint32_t c, cc; -int escape = 0; -int i; +uint32_t c; +uint32_t classbits_end = (end <= 0xff ? end : 0xff); +unsigned int n8 = 0; -GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ -ptr--; /* Set pointer back to the last code unit */ +/* If caseless matching is required, scan the range and process alternate +cases. In Unicode, there are 8-bit characters that have alternate cases that +are greater than 255 and vice-versa. Sometimes we can just extend the original +range. */ -/* If backslash is at the end of the pattern, it's an error. */ - -if (c == CHAR_NULL && ptr >= cb->end_pattern) *errorcodeptr = ERR1; - -/* Non-alphanumerics are literals, so we just leave the value in c. An initial -value test saves a memory lookup for code points outside the alphanumeric -range. Otherwise, do a table lookup. A non-zero result is something that can be -returned immediately. Otherwise further processing is required. */ - -else if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */ - -else if ((i = escapes[c - ESCAPES_FIRST]) != 0) +if ((options & PCRE2_CASELESS) != 0) { - if (i > 0) c = (uint32_t)i; else /* Positive is a data character */ +#ifdef SUPPORT_UNICODE + if ((options & PCRE2_UTF) != 0) { - escape = -i; /* Else return a special escape */ - if (escape == ESC_P || escape == ESC_p || escape == ESC_X) - cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */ + int rc; + uint32_t oc, od; + + options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ + c = start; + + while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) + { + /* Handle a single character that has more than one other case. */ + + if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, options, cb, + PRIV(ucd_caseless_sets) + rc, oc); + + /* Do nothing if the other case range is within the original range. */ + + else if (oc >= cb->class_range_start && od <= cb->class_range_end) continue; + + /* Extend the original range if there is overlap, noting that if oc < c, we + can't have od > end because a subrange is always shorter than the basic + range. Otherwise, use a recursive call to add the additional range. */ + + else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ + else if (od > end && oc <= end + 1) + { + end = od; /* Extend upwards */ + if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); + } + else n8 += add_to_class_internal(classbits, uchardptr, options, cb, oc, od); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + + for (c = start; c <= classbits_end; c++) + { + SETBIT(classbits, cb->fcc[c]); + n8++; } } -/* Escapes that need further processing, including those that are unknown. */ +/* Now handle the originally supplied range. Adjust the final value according +to the bit length - this means that the same lists of (e.g.) horizontal spaces +can be used in all cases. */ -else +if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR) + end = MAX_NON_UTF_CHAR; + +if (start > cb->class_range_start && end < cb->class_range_end) return n8; + +/* Use the bitmap for characters < 256. Otherwise use extra data.*/ + +for (c = start; c <= classbits_end; c++) { - PCRE2_SPTR oldptr; - BOOL braced, negated, overflow; - unsigned int s; - - switch (c) - { - /* A number of Perl escapes are not handled by PCRE. We give an explicit - error. */ - - case CHAR_l: - case CHAR_L: - *errorcodeptr = ERR37; - break; - - /* \u is unrecognized when PCRE2_ALT_BSUX is not set. When it is treated - specially, \u must be followed by four hex digits. Otherwise it is a - lowercase u letter. */ - - case CHAR_u: - if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; else - { - uint32_t xc; - if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ - if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */ - cc = (cc << 4) | xc; - if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */ - cc = (cc << 4) | xc; - if ((xc = XDIGIT(ptr[4])) == 0xff) break; /* Not a hex digit */ - c = (cc << 4) | xc; - ptr += 4; - if (utf) - { - if (c > 0x10ffffU) *errorcodeptr = ERR77; - else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; - } - else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; - } - break; - - case CHAR_U: - /* \U is unrecognized unless PCRE2_ALT_BSUX is set, in which case it is an - upper case letter. */ - if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; - break; - - /* In a character class, \g is just a literal "g". Outside a character - class, \g must be followed by one of a number of specific things: - - (1) A number, either plain or braced. If positive, it is an absolute - backreference. If negative, it is a relative backreference. This is a Perl - 5.10 feature. - - (2) Perl 5.10 also supports \g{name} as a reference to a named group. This - is part of Perl's movement towards a unified syntax for back references. As - this is synonymous with \k{name}, we fudge it up by pretending it really - was \k. - - (3) For Oniguruma compatibility we also support \g followed by a name or a - number either in angle brackets or in single quotes. However, these are - (possibly recursive) subroutine calls, _not_ backreferences. Just return - the ESC_g code (cf \k). */ - - case CHAR_g: - if (isclass) break; - if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE) - { - escape = ESC_g; - break; - } - - /* Handle the Perl-compatible cases */ - - if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) - { - PCRE2_SPTR p; - for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++) - if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; - if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET) - { - escape = ESC_k; - break; - } - braced = TRUE; - ptr++; - } - else braced = FALSE; - - if (ptr[1] == CHAR_MINUS) - { - negated = TRUE; - ptr++; - } - else negated = FALSE; - - /* The integer range is limited by the machine's int representation. */ - s = 0; - overflow = FALSE; - while (IS_DIGIT(ptr[1])) - { - if (s > INT_MAX / 10 - 1) /* Integer overflow */ - { - overflow = TRUE; - break; - } - s = s * 10 + (int)(*(++ptr) - CHAR_0); - } - if (overflow) /* Integer overflow */ - { - while (IS_DIGIT(ptr[1])) ptr++; - *errorcodeptr = ERR61; - break; - } - - if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET) - { - *errorcodeptr = ERR57; - break; - } - - if (s == 0) - { - *errorcodeptr = ERR58; - break; - } - - if (negated) - { - if (s > cb->bracount) - { - *errorcodeptr = ERR15; - break; - } - s = cb->bracount - (s - 1); - } - - escape = -s; - break; - - /* The handling of escape sequences consisting of a string of digits - starting with one that is not zero is not straightforward. Perl has changed - over the years. Nowadays \g{} for backreferences and \o{} for octal are - recommended to avoid the ambiguities in the old syntax. - - Outside a character class, the digits are read as a decimal number. If the - number is less than 10, or if there are that many previous extracting left - brackets, it is a back reference. Otherwise, up to three octal digits are - read to form an escaped character code. Thus \123 is likely to be octal 123 - (cf \0123, which is octal 012 followed by the literal 3). If the octal - value is greater than 377, the least significant 8 bits are taken. - - Inside a character class, \ followed by a digit is always either a literal - 8 or 9 or an octal number. */ - - case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: - case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - - if (!isclass) - { - oldptr = ptr; - /* The integer range is limited by the machine's int representation. */ - s = (int)(c - CHAR_0); - overflow = FALSE; - while (IS_DIGIT(ptr[1])) - { - if (s > INT_MAX / 10 - 1) /* Integer overflow */ - { - overflow = TRUE; - break; - } - s = s * 10 + (int)(*(++ptr) - CHAR_0); - } - if (overflow) /* Integer overflow */ - { - while (IS_DIGIT(ptr[1])) ptr++; - *errorcodeptr = ERR61; - break; - } - - /* \1 to \9 are always back references. \8x and \9x are too, unless there - are an awful lot of previous captures; \1x to \7x are octal escapes if - there are not that many previous captures. */ - - if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount) - { - escape = -s; /* Indicates a back reference */ - break; - } - ptr = oldptr; /* Put the pointer back and fall through */ - } - - /* Handle a digit following \ when the number is not a back reference, or - we are within a character class. If the first digit is 8 or 9, Perl used to - generate a binary zero byte and then treat the digit as a following - literal. At least by Perl 5.18 this changed so as not to insert the binary - zero. */ - - if ((c = *ptr) >= CHAR_8) break; - - /* Fall through with a digit less than 8 */ - - /* \0 always starts an octal number, but we may drop through to here with a - larger first octal digit. The original code used just to take the least - significant 8 bits of octal numbers (I think this is what early Perls used - to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, - but no more than 3 octal digits. */ - - case CHAR_0: - c -= CHAR_0; - while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) - c = c * 8 + *(++ptr) - CHAR_0; -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (!utf && c > 0xff) *errorcodeptr = ERR51; -#endif - break; - - /* \o is a relatively new Perl feature, supporting a more general way of - specifying character codes in octal. The only supported form is \o{ddd}. */ - - case CHAR_o: - if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else - if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else - { - ptr += 2; - c = 0; - overflow = FALSE; - while (*ptr >= CHAR_0 && *ptr <= CHAR_7) - { - cc = *ptr++; - if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x20000000l) { overflow = TRUE; break; } -#endif - c = (c << 3) + cc - CHAR_0 ; -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } -#elif PCRE2_CODE_UNIT_WIDTH == 16 - if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } -#elif PCRE2_CODE_UNIT_WIDTH == 32 - if (utf && c > 0x10ffffU) { overflow = TRUE; break; } -#endif - } - if (overflow) - { - while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++; - *errorcodeptr = ERR34; - } - else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) - { - if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; - } - else *errorcodeptr = ERR64; - } - break; - - /* \x is complicated. When PCRE2_ALT_BSUX is set, \x must be followed by - two hexadecimal digits. Otherwise it is a lowercase x letter. */ - - case CHAR_x: - if ((options & PCRE2_ALT_BSUX) != 0) - { - uint32_t xc; - if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ - if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */ - c = (cc << 4) | xc; - ptr += 2; - } /* End PCRE2_ALT_BSUX handling */ - - /* Handle \x in Perl's style. \x{ddd} is a character number which can be - greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex - digits. If not, { used to be treated as a data character. However, Perl - seems to read hex digits up to the first non-such, and ignore the rest, so - that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE - now gives an error. */ - - else - { - if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) - { - ptr += 2; - if (*ptr == CHAR_RIGHT_CURLY_BRACKET) - { - *errorcodeptr = ERR78; - break; - } - c = 0; - overflow = FALSE; - - while ((cc = XDIGIT(*ptr)) != 0xff) - { - ptr++; - if (c == 0 && cc == 0) continue; /* Leading zeroes */ -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x10000000l) { overflow = TRUE; break; } -#endif - c = (c << 4) | cc; - if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR)) - { - overflow = TRUE; - break; - } - } - - if (overflow) - { - while (XDIGIT(*ptr) != 0xff) ptr++; - *errorcodeptr = ERR34; - } - else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) - { - if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; - } - - /* If the sequence of hex digits does not end with '}', give an error. - We used just to recognize this construct and fall through to the normal - \x handling, but nowadays Perl gives an error, which seems much more - sensible, so we do too. */ - - else *errorcodeptr = ERR67; - } /* End of \x{} processing */ - - /* Read a single-byte hex-defined char (up to two hex digits after \x) */ - - else - { - c = 0; - if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ - ptr++; - c = cc; - if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ - ptr++; - c = (c << 4) | cc; - } /* End of \xdd handling */ - } /* End of Perl-style \x handling */ - break; - - /* The handling of \c is different in ASCII and EBCDIC environments. In an - ASCII (or Unicode) environment, an error is given if the character - following \c is not a printable ASCII character. Otherwise, the following - character is upper-cased if it is a letter, and after that the 0x40 bit is - flipped. The result is the value of the escape. - - In an EBCDIC environment the handling of \c is compatible with the - specification in the perlebcdic document. The following character must be - a letter or one of small number of special characters. These provide a - means of defining the character values 0-31. - - For testing the EBCDIC handling of \c in an ASCII environment, recognize - the EBCDIC value of 'c' explicitly. */ - -#if defined EBCDIC && 'a' != 0x81 - case 0x83: -#else - case CHAR_c: -#endif - - c = *(++ptr); - if (c >= CHAR_a && c <= CHAR_z) c += ESCAPES_UPPER_CASE; - if (c == CHAR_NULL && ptr >= cb->end_pattern) - { - *errorcodeptr = ERR2; - break; - } - - /* Handle \c in an ASCII/Unicode environment. */ - -#ifndef EBCDIC /* ASCII/UTF-8 coding */ - if (c < 32 || c > 126) /* Excludes all non-printable ASCII */ - { - *errorcodeptr = ERR68; - break; - } - c ^= 0x40; - - /* Handle \c in an EBCDIC environment. The special case \c? is converted to - 255 (0xff) or 95 (0x5f) if other character suggest we are using th POSIX-BC - encoding. (This is the way Perl indicates that it handles \c?.) The other - valid sequences correspond to a list of specific characters. */ - -#else - if (c == CHAR_QUESTION_MARK) - c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff; - else - { - for (i = 0; i < 32; i++) - { - if (c == ebcdic_escape_c[i]) break; - } - if (i < 32) c = i; else *errorcodeptr = ERR68; - } -#endif /* EBCDIC */ - - break; - - /* Any other alphanumeric following \ is an error. Perl gives an error only - if in warning mode, but PCRE doesn't have a warning mode. */ - - default: - *errorcodeptr = ERR3; - break; - } + /* Regardless of start, c will always be <= 255. */ + SETBIT(classbits, c); + n8++; } -/* Perl supports \N{name} for character names, as well as plain \N for "not -newline". PCRE does not support \N{name}. However, it does support -quantification such as \N{2,3}. */ +#ifdef SUPPORT_WIDE_CHARS +if (start <= 0xff) start = 0xff + 1; -if (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET && - !is_counted_repeat(ptr+2)) - *errorcodeptr = ERR37; +if (end >= start) + { + PCRE2_UCHAR *uchardata = *uchardptr; -/* If PCRE2_UCP is set, we change the values for \d etc. */ +#ifdef SUPPORT_UNICODE + if ((options & PCRE2_UTF) != 0) + { + if (start < end) + { + *uchardata++ = XCL_RANGE; + uchardata += PRIV(ord2utf)(start, uchardata); + uchardata += PRIV(ord2utf)(end, uchardata); + } + else if (start == end) + { + *uchardata++ = XCL_SINGLE; + uchardata += PRIV(ord2utf)(start, uchardata); + } + } + else +#endif /* SUPPORT_UNICODE */ -if ((options & PCRE2_UCP) != 0 && escape >= ESC_D && escape <= ESC_w) - escape += (ESC_DU - ESC_D); + /* Without UTF support, character values are constrained by the bit length, + and can only be > 256 for 16-bit and 32-bit libraries. */ -/* Set the pointer to the final character before returning. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 + {} +#else + if (start < end) + { + *uchardata++ = XCL_RANGE; + *uchardata++ = start; + *uchardata++ = end; + } + else if (start == end) + { + *uchardata++ = XCL_SINGLE; + *uchardata++ = start; + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + *uchardptr = uchardata; /* Updata extra data pointer */ + } +#else /* SUPPORT_WIDE_CHARS */ + (void)uchardptr; /* Avoid compiler warning */ +#endif /* SUPPORT_WIDE_CHARS */ -*ptrptr = ptr; -*chptr = c; -return escape; +return n8; /* Number of 8-bit characters */ } #ifdef SUPPORT_UNICODE /************************************************* -* Handle \P and \p * +* Add a list of characters to a class (internal) * *************************************************/ -/* This function is called after \P or \p has been encountered, provided that -PCRE2 is compiled with support for UTF and Unicode properties. On entry, the -contents of ptrptr are pointing at the P or p. On exit, it is left pointing at -the final code unit of the escape sequence. +/* This function is used for adding a list of case-equivalent characters to a +class when in UTF mode. This function is called only from within +add_to_class_internal(), with which it is mutually recursive. Arguments: - ptrptr the pattern position pointer - negptr a boolean that is set TRUE for negation else FALSE - ptypeptr an unsigned int that is set to the type value - pdataptr an unsigned int that is set to the detailed property value - errorcodeptr the error code variable - cb the compile data + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options word + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of + case-equivalent characters to avoid including the one we + already know about -Returns: TRUE if the type value was found, or FALSE for an invalid type +Returns: the number of < 256 characters added + the pointer to extra data is updated */ -static BOOL -get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, unsigned int *ptypeptr, - unsigned int *pdataptr, int *errorcodeptr, compile_block *cb) +static unsigned int +add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, + uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except) { -register PCRE2_UCHAR c; -int i, bot, top; -PCRE2_SPTR ptr = *ptrptr; -PCRE2_UCHAR name[32]; - -*negptr = FALSE; -c = *(++ptr); - -/* \P or \p can be followed by a name in {}, optionally preceded by ^ for -negation. */ - -if (c == CHAR_LEFT_CURLY_BRACKET) +unsigned int n8 = 0; +while (p[0] < NOTACHAR) { - if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT) + unsigned int n = 0; + if (p[0] != except) { - *negptr = TRUE; - ptr++; + while(p[n+1] == p[0] + n + 1) n++; + n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]); } - for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) - { - c = *(++ptr); - if (c == CHAR_NULL) goto ERROR_RETURN; - if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = c; - } - if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; - name[i] = 0; + p += n + 1; } - -/* Otherwise there is just one following character, which must be an ASCII -letter. */ - -else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) - { - name[0] = c; - name[1] = 0; - } -else goto ERROR_RETURN; - -*ptrptr = ptr; - -/* Search for a recognized property name using binary chop. */ - -bot = 0; -top = PRIV(utt_size); - -while (bot < top) - { - int r; - i = (bot + top) >> 1; - r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); - if (r == 0) - { - *ptypeptr = PRIV(utt)[i].type; - *pdataptr = PRIV(utt)[i].value; - return TRUE; - } - if (r > 0) bot = i + 1; else top = i; - } -*errorcodeptr = ERR47; /* Unrecognized name */ -return FALSE; - -ERROR_RETURN: /* Malformed \P or \p */ -*errorcodeptr = ERR46; -*ptrptr = ptr; -return FALSE; +return n8; } #endif /************************************************* -* Read repeat counts * +* External entry point for add range to class * *************************************************/ -/* Read an item of the form {n,m} and return the values. This is called only -after is_counted_repeat() has confirmed that a repeat-count quantifier exists, -so the syntax is guaranteed to be correct, but we need to check the values. +/* This function sets the overall range so that the internal functions can try +to avoid duplication when handling case-independence. Arguments: - p pointer to first char after '{' - minp pointer to int for min - maxp pointer to int for max - returned as -1 if no max - errorcodeptr points to error code variable + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options word + cb compile data + start start of range character + end end of range character -Returns: pointer to '}' on success; - current ptr on error, with errorcodeptr set non-zero +Returns: the number of < 256 characters added + the pointer to extra data is updated */ -static PCRE2_SPTR -read_repeat_counts(PCRE2_SPTR p, int *minp, int *maxp, int *errorcodeptr) +static unsigned int +add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, + compile_block *cb, uint32_t start, uint32_t end) { -int min = 0; -int max = -1; +cb->class_range_start = start; +cb->class_range_end = end; +return add_to_class_internal(classbits, uchardptr, options, cb, start, end); +} -while (IS_DIGIT(*p)) + +/************************************************* +* External entry point for add list to class * +*************************************************/ + +/* This function is used for adding a list of horizontal or vertical whitespace +characters to a class. The list must be in order so that ranges of characters +can be detected and handled appropriately. This function sets the overall range +so that the internal functions can try to avoid duplication when handling +case-independence. + +Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options word + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of + case-equivalent characters to avoid including the one we + already know about + +Returns: the number of < 256 characters added + the pointer to extra data is updated +*/ + +static unsigned int +add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, + compile_block *cb, const uint32_t *p, unsigned int except) +{ +unsigned int n8 = 0; +while (p[0] < NOTACHAR) { - min = min * 10 + (int)(*p++ - CHAR_0); - if (min > 65535) + unsigned int n = 0; + if (p[0] != except) { - *errorcodeptr = ERR5; - return p; + while(p[n+1] == p[0] + n + 1) n++; + cb->class_range_start = p[0]; + cb->class_range_end = p[n]; + n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]); } + p += n + 1; } - -if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else - { - if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) - { - max = 0; - while(IS_DIGIT(*p)) - { - max = max * 10 + (int)(*p++ - CHAR_0); - if (max > 65535) - { - *errorcodeptr = ERR5; - return p; - } - } - if (max < min) - { - *errorcodeptr = ERR4; - return p; - } - } - } - -*minp = min; -*maxp = max; -return p; +return n8; } /************************************************* -* Scan compiled regex for specific bracket * +* Add characters not in a list to a class * *************************************************/ -/* This function scans through a compiled pattern until it finds a -capturing bracket with the given number, or, if the number is negative, an -instance of OP_REVERSE for a lookbehind. The function is global in the C sense -so that it can be called from pcre2_study() when finding the minimum matching -length. +/* This function is used for adding the complement of a list of horizontal or +vertical whitespace to a class. The list must be in order. Arguments: - code points to start of expression - utf TRUE in UTF mode - number the required bracket number or negative to find a lookbehind + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options word + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR -Returns: pointer to the opcode for the bracket, or NULL if not found +Returns: the number of < 256 characters added + the pointer to extra data is updated */ -PCRE2_SPTR -PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number) +static unsigned int +add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, + uint32_t options, compile_block *cb, const uint32_t *p) { +BOOL utf = (options & PCRE2_UTF) != 0; +unsigned int n8 = 0; +if (p[0] > 0) + n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1); +while (p[0] < NOTACHAR) + { + while (p[1] == p[0] + 1) p++; + n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1, + (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); + p++; + } +return n8; +} + + + +/************************************************* +* Find details of duplicate group names * +*************************************************/ + +/* This is called from compile_branch() when it needs to know the index and +count of duplicates in the names table when processing named backreferences, +either directly, or as conditions. + +Arguments: + name points to the name + length the length of the name + indexptr where to put the index + countptr where to put the count of duplicates + errorcodeptr where to put an error code + cb the compile block + +Returns: TRUE if OK, FALSE if not, error code set +*/ + +static BOOL +find_dupname_details(PCRE2_SPTR name, uint32_t length, int *indexptr, + int *countptr, int *errorcodeptr, compile_block *cb) +{ +uint32_t i, groupnumber; +int count; +PCRE2_UCHAR *slot = cb->name_table; + +/* Find the first entry in the table */ + +for (i = 0; i < cb->names_found; i++) + { + if (PRIV(strncmp)(name, slot+IMM2_SIZE, length) == 0 && + slot[IMM2_SIZE+length] == 0) break; + slot += cb->name_entry_size; + } + +/* This should not occur, because this function is called only when we know we +have duplicate names. Give an internal error. */ + +if (i >= cb->names_found) + { + *errorcodeptr = ERR53; + cb->erroroffset = name - cb->start_pattern; + return FALSE; + } + +/* Record the index and then see how many duplicates there are, updating the +backref map and maximum back reference as we do. */ + +*indexptr = i; +count = 0; + for (;;) { - register PCRE2_UCHAR c = *code; + count++; + groupnumber = GET2(slot,0); + cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1; + if (groupnumber > cb->top_backref) cb->top_backref = groupnumber; + if (++i >= cb->names_found) break; + slot += cb->name_entry_size; + if (PRIV(strncmp)(name, slot+IMM2_SIZE, length) != 0 || + (slot+IMM2_SIZE)[length] != 0) break; + } - if (c == OP_END) return NULL; +*countptr = count; +return TRUE; +} - /* XCLASS is used for classes that cannot be represented just by a bit map. - This includes negated single high-valued characters. CALLOUT_STR is used for - callouts with string arguments. In both cases the length in the table is - zero; the actual length is stored in the compiled code. */ - if (c == OP_XCLASS) code += GET(code, 1); - else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); - /* Handle recursion */ +/************************************************* +* Compile one branch * +*************************************************/ - else if (c == OP_REVERSE) +/* Scan the parsed pattern, compiling it into the a vector of PCRE2_UCHAR. If +the options are changed during the branch, the pointer is used to change the +external options bits. This function is used during the pre-compile phase when +we are trying to find out the amount of memory needed, as well as during the +real compile phase. The value of lengthptr distinguishes the two phases. + +Arguments: + optionsptr pointer to the option bits + codeptr points to the pointer to the current code point + pptrptr points to the current parsed pattern pointer + errorcodeptr points to error code variable + firstcuptr place to put the first required code unit + firstcuflagsptr place to put the first code unit flags, or a negative number + reqcuptr place to put the last required code unit + reqcuflagsptr place to put the last required code unit flags, or a negative number + bcptr points to current branch chain + cb contains pointers to tables etc. + lengthptr NULL during the real compile phase + points to length accumulator during pre-compile phase + +Returns: 0 There's been an error, *errorcodeptr is non-zero + +1 Success, this branch must match at least one character + -1 Success, this branch may match an empty string +*/ + +static int +compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, + int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr, + uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr, + compile_block *cb, PCRE2_SIZE *lengthptr) +{ +int bravalue = 0; +int okreturn = -1; +int group_return = 0; +uint32_t repeat_min = 0, repeat_max = 0; /* To please picky compilers */ +uint32_t greedy_default, greedy_non_default; +uint32_t repeat_type, op_type; +uint32_t options = *optionsptr; /* May change dynamically */ +uint32_t firstcu, reqcu; +uint32_t zeroreqcu, zerofirstcu; +uint32_t escape; +uint32_t *pptr = *pptrptr; +uint32_t meta, meta_arg; +int32_t firstcuflags, reqcuflags; +int32_t zeroreqcuflags, zerofirstcuflags; +int32_t req_caseopt, reqvary, tempreqvary; +PCRE2_SIZE offset = 0; +PCRE2_SIZE length_prevgroup = 0; +PCRE2_UCHAR *code = *codeptr; +PCRE2_UCHAR *last_code = code; +PCRE2_UCHAR *orig_code = code; +PCRE2_UCHAR *tempcode; +PCRE2_UCHAR *previous = NULL; +PCRE2_UCHAR op_previous; +BOOL groupsetfirstcu = FALSE; +BOOL matched_char = FALSE; +BOOL previous_matched_char = FALSE; +const uint8_t *cbits = cb->cbits; +uint8_t classbits[32]; + +/* We can fish out the UTF setting once and for all into a BOOL, but we must +not do this for other options (e.g. PCRE2_EXTENDED) because they may change +dynamically as we process the pattern. */ + +#ifdef SUPPORT_UNICODE +BOOL utf = (options & PCRE2_UTF) != 0; +#else /* No UTF support */ +BOOL utf = FALSE; +#endif + +/* Helper variables for OP_XCLASS opcode (for characters > 255). We define +class_uchardata always so that it can be passed to add_to_class() always, +though it will not be used in non-UTF 8-bit cases. This avoids having to supply +alternative calls for the different cases. */ + +PCRE2_UCHAR *class_uchardata; +#ifdef SUPPORT_WIDE_CHARS +BOOL xclass; +PCRE2_UCHAR *class_uchardata_base; +#endif + +/* Set up the default and non-default settings for greediness */ + +greedy_default = ((options & PCRE2_UNGREEDY) != 0); +greedy_non_default = greedy_default ^ 1; + +/* Initialize no first unit, no required unit. REQ_UNSET means "no char +matching encountered yet". It gets changed to REQ_NONE if we hit something that +matches a non-fixed first unit; reqcu just remains unset if we never find one. + +When we hit a repeat whose minimum is zero, we may have to adjust these values +to take the zero repeat into account. This is implemented by setting them to +zerofirstcu and zeroreqcu when such a repeat is encountered. The individual +item types that can be repeated set these backoff variables appropriately. */ + +firstcu = reqcu = zerofirstcu = zeroreqcu = 0; +firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET; + +/* The variable req_caseopt contains either the REQ_CASELESS value or zero, +according to the current setting of the caseless flag. The REQ_CASELESS value +leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables +to record the case status of the value. This is used only for ASCII characters. +*/ + +req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; + +/* Switch on next META item until the end of the branch */ + +for (;; pptr++) + { +#ifdef SUPPORT_WIDE_CHARS + BOOL xclass_has_prop; +#endif + BOOL negate_class; + BOOL should_flip_negation; + BOOL match_all_or_no_wide_chars; + BOOL possessive_quantifier; + BOOL note_group_empty; + int class_has_8bitchar; + int i; + uint32_t mclength; + uint32_t templastcapture; + uint32_t skipunits; + uint32_t subreqcu, subfirstcu; + uint32_t groupnumber; + uint32_t verbarglen, verbculen; + int32_t subreqcuflags, subfirstcuflags; /* Must be signed */ + open_capitem *oc; + PCRE2_UCHAR mcbuffer[8]; + + /* Get next META item in the pattern and its potential argument. */ + + meta = META_CODE(*pptr); + meta_arg = META_DATA(*pptr); + + /* If we are in the pre-compile phase, accumulate the length used for the + previous cycle of this loop, unless the next item is a quantifier. */ + + if (lengthptr != NULL) { - if (number < 0) return (PCRE2_UCHAR *)code; - code += PRIV(OP_lengths)[c]; - } - - /* Handle capturing bracket */ - - else if (c == OP_CBRA || c == OP_SCBRA || - c == OP_CBRAPOS || c == OP_SCBRAPOS) - { - int n = (int)GET2(code, 1+LINK_SIZE); - if (n == number) return (PCRE2_UCHAR *)code; - code += PRIV(OP_lengths)[c]; - } - - /* Otherwise, we can get the item's length from the table, except that for - repeated character types, we have to test for \p and \P, which have an extra - two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we - must add in its length. */ - - else - { - switch(c) + if (code > cb->start_workspace + cb->workspace_size - + WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ { - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: - if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + *errorcodeptr = (code >= cb->start_workspace + cb->workspace_size)? + ERR52 : ERR86; + return 0; + } + + /* There is at least one situation where code goes backwards: this is the + case of a zero quantifier after a class (e.g. [ab]{0}). When the quantifier + is processed, the whole class is eliminated. However, it is created first, + so we have to allow memory for it. Therefore, don't ever reduce the length + at this point. */ + + if (code < last_code) code = last_code; + + /* If the next thing is not a quantifier, we add the length of the previous + item into the total, and reset the code pointer to the start of the + workspace. Otherwise leave the previous item available to be quantified. */ + + if (meta < META_ASTERISK || meta > META_MINMAX_QUERY) + { + if (OFLOW_MAX - *lengthptr < (PCRE2_SIZE)(code - orig_code)) + { + *errorcodeptr = ERR20; /* Integer overflow */ + return 0; + } + *lengthptr += (PCRE2_SIZE)(code - orig_code); + if (*lengthptr > MAX_PATTERN_SIZE) + { + *errorcodeptr = ERR20; /* Pattern is too large */ + return 0; + } + code = orig_code; + } + + /* Remember where this code item starts so we can catch the "backwards" + case above next time round. */ + + last_code = code; + } + + /* Process the next parsed pattern item. If it is not a quantifier, remember + where it starts so that it can be quantified when a quantifier follows. + Checking for the legality of quantifiers happens in parse_regex(), except for + a quantifier after an assertion that is a condition. */ + + if (meta < META_ASTERISK || meta > META_MINMAX_QUERY) + { + previous = code; + if (matched_char) okreturn = 1; + } + + previous_matched_char = matched_char; + matched_char = FALSE; + note_group_empty = FALSE; + skipunits = 0; /* Default value for most subgroups */ + + switch(meta) + { + /* ===================================================================*/ + /* The branch terminates at pattern end or | or ) */ + + case META_END: + case META_ALT: + case META_KET: + *firstcuptr = firstcu; + *firstcuflagsptr = firstcuflags; + *reqcuptr = reqcu; + *reqcuflagsptr = reqcuflags; + *codeptr = code; + *pptrptr = pptr; + return okreturn; + + + /* ===================================================================*/ + /* Handle single-character metacharacters. In multiline mode, ^ disables + the setting of any following char as a first character. */ + + case META_CIRCUMFLEX: + if ((options & PCRE2_MULTILINE) != 0) + { + if (firstcuflags == REQ_UNSET) + zerofirstcuflags = firstcuflags = REQ_NONE; + *code++ = OP_CIRCM; + } + else *code++ = OP_CIRC; + break; + + case META_DOLLAR: + *code++ = ((options & PCRE2_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; + break; + + /* There can never be a first char if '.' is first, whatever happens about + repeats. The value of reqcu doesn't change either. */ + + case META_DOT: + matched_char = TRUE; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + *code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY; + break; + + + /* ===================================================================*/ + /* Empty character classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. + Otherwise, an initial ']' is taken as a data character. When empty classes + are allowed, [] must always fail, so generate OP_FAIL, whereas [^] must + match any character, so generate OP_ALLANY. */ + + case META_CLASS_EMPTY: + case META_CLASS_EMPTY_NOT: + matched_char = TRUE; + *code++ = (meta == META_CLASS_EMPTY_NOT)? OP_ALLANY : OP_FAIL; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + break; + + + /* ===================================================================*/ + /* Non-empty character class. If the included characters are all < 256, we + build a 32-byte bitmap of the permitted characters, except in the special + case where there is only one such character. For negated classes, we build + the map as usual, then invert it at the end. However, we use a different + opcode so that data characters > 255 can be handled correctly. + + If the class contains characters outside the 0-255 range, a different + opcode is compiled. It may optionally have a bit map for characters < 256, + but those above are are explicitly listed afterwards. A flag code unit + tells whether the bitmap is present, and whether this is a negated class or + not. */ + + case META_CLASS_NOT: + case META_CLASS: + matched_char = TRUE; + negate_class = meta == META_CLASS_NOT; + + /* We can optimize the case of a single character in a class by generating + OP_CHAR or OP_CHARI if it's positive, or OP_NOT or OP_NOTI if it's + negative. In the negative case there can be no first char if this item is + first, whatever repeat count may follow. In the case of reqcu, save the + previous value for reinstating. */ + + /* NOTE: at present this optimization is not effective if the only + character in a class in 32-bit, non-UCP mode has its top bit set. */ + + if (pptr[1] < META_END && pptr[2] == META_CLASS_END) + { +#ifdef SUPPORT_UNICODE + uint32_t d; +#endif + uint32_t c = pptr[1]; + + pptr += 2; /* Move on to class end */ + if (meta == META_CLASS) /* A positive one-char class can be */ + { /* handled as a normal literal character. */ + meta = c; /* Set up the character */ + goto NORMAL_CHAR_SET; + } + + /* Handle a negative one-character class */ + + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + + /* For caseless UTF mode, check whether this character has more than + one other case. If so, generate a special OP_NOTPROP item instead of + OP_NOTI. */ + +#ifdef SUPPORT_UNICODE + if (utf && (options & PCRE2_CASELESS) != 0 && + (d = UCD_CASESET(c)) != 0) + { + *code++ = OP_NOTPROP; + *code++ = PT_CLIST; + *code++ = d; + break; /* We are finished with this class */ + } +#endif + /* Char has only one other case, or UCP not available */ + + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; + code += PUTCHAR(c, code); + break; /* We are finished with this class */ + } /* End of 1-char optimization */ + + /* Handle character classes that contain more than just one literal + character. */ + + /* If a non-extended class contains a negative special such as \S, we need + to flip the negation flag at the end, so that support for characters > 255 + works correctly (they are all included in the class). An extended class may + need to insert specific matching or non-matching code for wide characters. + */ + + should_flip_negation = match_all_or_no_wide_chars = FALSE; + + /* Extended class (xclass) will be used when characters > 255 + might match. */ + +#ifdef SUPPORT_WIDE_CHARS + xclass = FALSE; + class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ + class_uchardata_base = class_uchardata; /* Save the start */ +#endif + + /* For optimization purposes, we track some properties of the class: + class_has_8bitchar will be non-zero if the class contains at least one + character with a code point less than 256; xclass_has_prop will be TRUE if + Unicode property checks are present in the class. */ + + class_has_8bitchar = 0; +#ifdef SUPPORT_WIDE_CHARS + xclass_has_prop = FALSE; +#endif + + /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map + in a temporary bit of memory, in case the class contains fewer than two + 8-bit characters because in that case the compiled code doesn't use the bit + map. */ + + memset(classbits, 0, 32 * sizeof(uint8_t)); + + /* Process items until META_CLASS_END is reached. */ + + while ((meta = *(++pptr)) != META_CLASS_END) + { + /* Handle POSIX classes such as [:alpha:] etc. */ + + if (meta == META_POSIX || meta == META_POSIX_NEG) + { + BOOL local_negate = (meta == META_POSIX_NEG); + int posix_class = *(++pptr); + int taboffset, tabopt; + uint8_t pbits[32]; + + should_flip_negation = local_negate; /* Note negative special */ + + /* If matching is caseless, upper and lower are converted to alpha. + This relies on the fact that the class table starts with alpha, + lower, upper as the first 3 entries. */ + + if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) + posix_class = 0; + + /* When PCRE2_UCP is set, some of the POSIX classes are converted to + different escape sequences that use Unicode properties \p or \P. + Others that are not available via \p or \P have to generate + XCL_PROP/XCL_NOTPROP directly, which is done here. */ + +#ifdef SUPPORT_UNICODE + if ((options & PCRE2_UCP) != 0) switch(posix_class) + { + case PC_GRAPH: + case PC_PRINT: + case PC_PUNCT: + *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; + *class_uchardata++ = (PCRE2_UCHAR) + ((posix_class == PC_GRAPH)? PT_PXGRAPH : + (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); + *class_uchardata++ = 0; + xclass_has_prop = TRUE; + goto CONTINUE_CLASS; + + /* For the other POSIX classes (ascii, xdigit) we are going to + fall through to the non-UCP case and build a bit map for + characters with code points less than 256. However, if we are in + a negated POSIX class, characters with code points greater than + 255 must either all match or all not match, depending on whether + the whole class is not or is negated. For example, for + [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]... + they must not. + + In the special case where there are no xclass items, this is + automatically handled by the use of OP_CLASS or OP_NCLASS, but an + explicit range is needed for OP_XCLASS. Setting a flag here + causes the range to be generated later when it is known that + OP_XCLASS is required. In the 8-bit library this is relevant only in + utf mode, since no wide characters can exist otherwise. */ + + default: +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (utf) +#endif + match_all_or_no_wide_chars |= local_negate; + break; + } +#endif /* SUPPORT_UNICODE */ + + /* In the non-UCP case, or when UCP makes no difference, we build the + bit map for the POSIX class in a chunk of local store because we may + be adding and subtracting from it, and we don't want to subtract bits + that may be in the main map already. At the end we or the result into + the bit map that is being built. */ + + posix_class *= 3; + + /* Copy in the first table (always present) */ + + memcpy(pbits, cbits + posix_class_maps[posix_class], + 32 * sizeof(uint8_t)); + + /* If there is a second table, add or remove it as required. */ + + taboffset = posix_class_maps[posix_class + 1]; + tabopt = posix_class_maps[posix_class + 2]; + + if (taboffset >= 0) + { + if (tabopt >= 0) + for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; + else + for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; + } + + /* Now see if we need to remove any special characters. An option + value of 1 removes vertical space and 2 removes underscore. */ + + if (tabopt < 0) tabopt = -tabopt; + if (tabopt == 1) pbits[1] &= ~0x3c; + else if (tabopt == 2) pbits[11] &= 0x7f; + + /* Add the POSIX table or its complement into the main table that is + being built and we are done. */ + + if (local_negate) + for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i]; + else + for (i = 0; i < 32; i++) classbits[i] |= pbits[i]; + + /* Every class contains at least one < 256 character. */ + + class_has_8bitchar = 1; + goto CONTINUE_CLASS; /* End of POSIX handling */ + } + + /* Other than POSIX classes, the only items we should encounter are + \d-type escapes and literal characters (possibly as ranges). */ + + if (meta == META_BIGVALUE) + { + meta = *(++pptr); + goto CLASS_LITERAL; + } + + /* Any other non-literal must be an escape */ + + if (meta >= META_END) + { + if (META_CODE(meta) != META_ESCAPE) + { +#ifdef DEBUG_SHOW_PARSED + fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x " + "in character class\n", meta); +#endif + *errorcodeptr = ERR89; /* Internal error - unrecognized. */ + return 0; + } + escape = META_DATA(meta); + + /* Every class contains at least one < 256 character. */ + + class_has_8bitchar++; + + switch(escape) + { + case ESC_d: + for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; + break; + + case ESC_D: + should_flip_negation = TRUE; + for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit]; + break; + + case ESC_w: + for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; + break; + + case ESC_W: + should_flip_negation = TRUE; + for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word]; + break; + + /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl + 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was + previously set by something earlier in the character class. + Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so + we could just adjust the appropriate bit. From PCRE 8.34 we no + longer treat \s and \S specially. */ + + case ESC_s: + for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; + break; + + case ESC_S: + should_flip_negation = TRUE; + for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space]; + break; + + /* When adding the horizontal or vertical space lists to a class, or + their complements, disable PCRE2_CASELESS, because it justs wastes + time, and in the "not-x" UTF cases can create unwanted duplicates in + the XCLASS list (provoked by characters that have more than one other + case and by both cases being in the same "not-x" sublist). */ + + case ESC_h: + (void)add_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR); + break; + + case ESC_H: + (void)add_not_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, cb, PRIV(hspace_list)); + break; + + case ESC_v: + (void)add_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR); + break; + + case ESC_V: + (void)add_not_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, cb, PRIV(vspace_list)); + break; + + case ESC_p: + case ESC_P: + { + uint32_t ptype = *(++pptr) >> 16; + uint32_t pdata = *pptr & 0xffff; + *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP; + *class_uchardata++ = ptype; + *class_uchardata++ = pdata; +#ifdef SUPPORT_WIDE_CHARS + xclass_has_prop = TRUE; +#endif + class_has_8bitchar--; /* Undo! */ + } + break; + } + + goto CONTINUE_CLASS; + } /* End handling \d-type escapes */ + + /* A literal character may be followed by a range meta. At parse time + there are checks for out-of-order characters, for ranges where the two + characters are equal, and for hyphens that cannot indicate a range. At + this point, therefore, no checking is needed. */ + + else + { + uint32_t c, d; + + CLASS_LITERAL: + c = d = meta; + + /* Remember if \r or \n were explicitly used */ + + if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + + /* Process a character range */ + + if (pptr[1] == META_RANGE_LITERAL || pptr[1] == META_RANGE_ESCAPED) + { +#ifdef EBCDIC + BOOL range_is_literal = (pptr[1] == META_RANGE_LITERAL); +#endif + pptr += 2; + d = *pptr; + if (d == META_BIGVALUE) d = *(++pptr); + + /* Remember an explicit \r or \n, and add the range to the class. */ + + if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + + /* In an EBCDIC environment, Perl treats alphabetic ranges specially + because there are holes in the encoding, and simply using the range + A-Z (for example) would include the characters in the holes. This + applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */ + +#ifdef EBCDIC + if (range_is_literal && + (cb->ctypes[c] & ctype_letter) != 0 && + (cb->ctypes[d] & ctype_letter) != 0 && + (d <= CHAR_z) == (d <= CHAR_z)) + { + uint32_t uc = (d <= CHAR_z)? 0 : 64; + uint32_t C = d - uc; + uint32_t D = d - uc; + + if (C <= CHAR_i) + { + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, cb, C + uc, + ((D < CHAR_i)? D : CHAR_i) + uc); + C = CHAR_j; + } + + if (C <= D && C <= CHAR_r) + { + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, cb, C + uc, + ((D < CHAR_r)? D : CHAR_r) + uc); + C = CHAR_s; + } + + if (C <= D) + { + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, cb, C + uc, + D + uc); + } + } + else +#endif + /* Not an EBCDIC special range */ + + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, cb, c, d); + goto CONTINUE_CLASS; /* Go get the next char in the class */ + } /* End of range handling */ + + + /* Handle a single character. */ + + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, cb, meta, meta); + } + + /* Continue to the next item in the class. */ + + CONTINUE_CLASS: + +#ifdef SUPPORT_WIDE_CHARS + /* If any wide characters or Unicode properties have been encountered, + set xclass = TRUE. Then, in the pre-compile phase, accumulate the length + of the extra data and reset the pointer. This is so that very large + classes that contain a zillion wide characters or Unicode property tests + do not overwrite the work space (which is on the stack). */ + + if (class_uchardata > class_uchardata_base) + { + xclass = TRUE; + if (lengthptr != NULL) + { + *lengthptr += class_uchardata - class_uchardata_base; + class_uchardata = class_uchardata_base; + } + } +#endif + + continue; /* Needed to avoid error when not supporting wide chars */ + } /* End of main class-processing loop */ + + /* If this class is the first thing in the branch, there can be no first + char setting, whatever the repeat count. Any reqcu setting must remain + unchanged after any kind of repeat. */ + + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + + /* If there are characters with values > 255, or Unicode property settings + (\p or \P), we have to compile an extended class, with its own opcode, + unless there were no property settings and there was a negated special such + as \S in the class, and PCRE2_UCP is not set, because in that case all + characters > 255 are in or not in the class, so any that were explicitly + given as well can be ignored. + + In the UCP case, if certain negated POSIX classes ([:^ascii:] or + [^:xdigit:]) were present in a class, we either have to match or not match + all wide characters (depending on whether the whole class is or is not + negated). This requirement is indicated by match_all_or_no_wide_chars being + true. We do this by including an explicit range, which works in both cases. + This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there + cannot be any wide characters in 8-bit non-UTF mode. + + When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit + class where \S etc is present without PCRE2_UCP, causing an extended class + to be compiled, we make sure that all characters > 255 are included by + forcing match_all_or_no_wide_chars to be true. + + If, when generating an xclass, there are no characters < 256, we can omit + the bitmap in the actual compiled code. */ + +#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */ + if (xclass && ( +#ifdef SUPPORT_UNICODE + (options & PCRE2_UCP) != 0 || +#endif + xclass_has_prop || !should_flip_negation)) + { + if (match_all_or_no_wide_chars || ( +#if PCRE2_CODE_UNIT_WIDTH == 8 + utf && +#endif + should_flip_negation && !negate_class && (options & PCRE2_UCP) == 0)) + { + *class_uchardata++ = XCL_RANGE; + if (utf) /* Will always be utf in the 8-bit library */ + { + class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); + class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata); + } + else /* Can only happen for the 16-bit & 32-bit libraries */ + { +#if PCRE2_CODE_UNIT_WIDTH == 16 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffu; +#elif PCRE2_CODE_UNIT_WIDTH == 32 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffffffu; +#endif + } + } + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ + *code++ = OP_XCLASS; + code += LINK_SIZE; + *code = negate_class? XCL_NOT:0; + if (xclass_has_prop) *code |= XCL_HASPROP; + + /* If the map is required, move up the extra data to make room for it; + otherwise just move the code pointer to the end of the extra data. */ + + if (class_has_8bitchar > 0) + { + *code++ |= XCL_MAP; + memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, + CU2BYTES(class_uchardata - code)); + if (negate_class && !xclass_has_prop) + for (i = 0; i < 32; i++) classbits[i] = ~classbits[i]; + memcpy(code, classbits, 32); + code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); + } + else code = class_uchardata; + + /* Now fill in the complete length of the item */ + + PUT(previous, 1, (int)(code - previous)); + break; /* End of class handling */ + } +#endif /* SUPPORT_WIDE_CHARS */ + + /* If there are no characters > 255, or they are all to be included or + excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the + whole class was negated and whether there were negative specials such as \S + (non-UCP) in the class. Then copy the 32-byte map into the code vector, + negating it if necessary. */ + + *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; + if (lengthptr == NULL) /* Save time in the pre-compile phase */ + { + if (negate_class) + for (i = 0; i < 32; i++) classbits[i] = ~classbits[i]; + memcpy(code, classbits, 32); + } + code += 32 / sizeof(PCRE2_UCHAR); + break; /* End of class processing */ + + + /* ===================================================================*/ + /* Deal with (*VERB)s. */ + + /* Check for open captures before ACCEPT and convert it to ASSERT_ACCEPT if + in an assertion. In the first pass, just accumulate the length required; + otherwise hitting (*ACCEPT) inside many nested parentheses can cause + workspace overflow. Do not set firstcu after *ACCEPT. */ + + case META_ACCEPT: + cb->had_accept = TRUE; + for (oc = cb->open_caps; oc != NULL; oc = oc->next) + { + if (lengthptr != NULL) + { + *lengthptr += CU2BYTES(1) + IMM2_SIZE; + } + else + { + *code++ = OP_CLOSE; + PUT2INC(code, 0, oc->number); + } + } + *code++ = (cb->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + break; + + case META_PRUNE: + case META_SKIP: + cb->had_pruneorskip = TRUE; + /* Fall through */ + case META_COMMIT: + case META_FAIL: + *code++ = verbops[(meta - META_MARK) >> 16]; + break; + + case META_THEN: + cb->external_flags |= PCRE2_HASTHEN; + *code++ = OP_THEN; + break; + + /* Handle verbs with arguments. Arguments can be very long, especially in + 16- and 32-bit modes, and can overflow the workspace in the first pass. + However, the argument length is constrained to be small enough to fit in + one code unit. This check happens in parse_regex(). In the first pass, + instead of putting the argument into memory, we just update the length + counter and set up an empty argument. */ + + case META_THEN_ARG: + cb->external_flags |= PCRE2_HASTHEN; + goto VERB_ARG; + + case META_PRUNE_ARG: + case META_SKIP_ARG: + cb->had_pruneorskip = TRUE; + /* Fall through */ + case META_MARK: + VERB_ARG: + *code++ = verbops[(meta - META_MARK) >> 16]; + /* The length is in characters. */ + verbarglen = *(++pptr); + verbculen = 0; + tempcode = code++; + for (i = 0; i < (int)verbarglen; i++) + { + meta = *(++pptr); +#ifdef SUPPORT_UNICODE + if (utf) mclength = PRIV(ord2utf)(meta, mcbuffer); else +#endif + { + mclength = 1; + mcbuffer[0] = meta; + } + if (lengthptr != NULL) *lengthptr += mclength; else + { + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + verbculen += mclength; + } + } + + *tempcode = verbculen; /* Fill in the code unit length */ + *code++ = 0; /* Terminating zero */ + break; + + + /* ===================================================================*/ + /* Handle options change. The new setting must be passed back for use in + subsequent branches. Reset the greedy defaults and the case value for + firstcu and reqcu. */ + + case META_OPTIONS: + *optionsptr = options = *(++pptr); + greedy_default = ((options & PCRE2_UNGREEDY) != 0); + greedy_non_default = greedy_default ^ 1; + req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; + break; + + + /* ===================================================================*/ + /* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous + because it could be a numerical check on recursion, or a name check on a + group's being set. The pre-pass sets up META_COND_RNUMBER as a name so that + we can handle it either way. We first try for a name; if not found, process + the number. */ + + case META_COND_RNUMBER: /* (?(Rdigits) */ + case META_COND_NAME: /* (?(name) or (?'name') or ?() */ + case META_COND_RNAME: /* (?(R&name) - test for recursion */ + bravalue = OP_COND; + { + int count, index; + PCRE2_SPTR name; + named_group *ng = cb->named_groups; + uint32_t length = *(++pptr); + + GETPLUSOFFSET(offset, pptr); + name = cb->start_pattern + offset; + + /* In the first pass, the names generated in the pre-pass are available, + but the main name table has not yet been created. Scan the list of names + generated in the pre-pass in order to get a number and whether or not + this name is duplicated. If it is not duplicated, we can handle it as a + numerical group. */ + + for (i = 0; i < cb->names_found; i++, ng++) + { + if (length == ng->length && + PRIV(strncmp)(name, ng->name, length) == 0) + { + if (!ng->isdup) + { + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + PUT2(code, 2+LINK_SIZE, ng->number); + if (ng->number > cb->top_backref) cb->top_backref = ng->number; + skipunits = 1+IMM2_SIZE; + goto GROUP_PROCESS_NOTE_EMPTY; + } + break; /* Found a duplicated name */ + } + } + + /* If the name was not found we have a bad reference, unless we are + dealing with R, which is treated as a recursion test by number. + */ + + if (i >= cb->names_found) + { + groupnumber = 0; + if (meta == META_COND_RNUMBER) + { + for (i = 1; i < (int)length; i++) + { + groupnumber = groupnumber * 10 + name[i] - CHAR_0; + if (groupnumber > MAX_GROUP_NUMBER) + { + *errorcodeptr = ERR61; + cb->erroroffset = offset + i; + return 0; + } + } + } + + if (meta != META_COND_RNUMBER || groupnumber > cb->bracount) + { + *errorcodeptr = ERR15; + cb->erroroffset = offset; + return 0; + } + + /* (?Rdigits) treated as a recursion reference by number. A value of + zero (which is the result of both (?R) and (?R0)) means "any", and is + translated into RREF_ANY (which is 0xffff). */ + + if (groupnumber == 0) groupnumber = RREF_ANY; + code[1+LINK_SIZE] = OP_RREF; + PUT2(code, 2+LINK_SIZE, groupnumber); + skipunits = 1+IMM2_SIZE; + goto GROUP_PROCESS_NOTE_EMPTY; + } + + /* A duplicated name was found. Note that if an R name is found + (META_COND_RNUMBER), it is a reference test, not a recursion test. */ + + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + + /* We have a duplicated name. In the compile pass we have to search the + main table in order to get the index and count values. */ + + count = 0; /* Values for first pass (avoids compiler warning) */ + index = 0; + if (lengthptr == NULL && !find_dupname_details(name, length, &index, + &count, errorcodeptr, cb)) return 0; + + /* Add one to the opcode to change CREF/RREF into DNCREF/DNRREF and + insert appropriate data values. */ + + code[1+LINK_SIZE]++; + skipunits = 1+2*IMM2_SIZE; + PUT2(code, 2+LINK_SIZE, index); + PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count); + } + goto GROUP_PROCESS_NOTE_EMPTY; + + /* The DEFINE condition is always false. It's internal groups may never + be called, so matched_char must remain false, hence the jump to + GROUP_PROCESS rather than GROUP_PROCESS_NOTE_EMPTY. */ + + case META_COND_DEFINE: + bravalue = OP_COND; + GETPLUSOFFSET(offset, pptr); + code[1+LINK_SIZE] = OP_DEFINE; + skipunits = 1; + goto GROUP_PROCESS; + + /* Conditional test of a group's being set. */ + + case META_COND_NUMBER: + bravalue = OP_COND; + GETPLUSOFFSET(offset, pptr); + groupnumber = *(++pptr); + if (groupnumber > cb->bracount) + { + *errorcodeptr = ERR15; + cb->erroroffset = offset; + return 0; + } + if (groupnumber > cb->top_backref) cb->top_backref = groupnumber; + offset -= 2; /* Point at initial ( for too many branches error */ + code[1+LINK_SIZE] = OP_CREF; + skipunits = 1+IMM2_SIZE; + PUT2(code, 2+LINK_SIZE, groupnumber); + goto GROUP_PROCESS_NOTE_EMPTY; + + /* Test for the PCRE2 version. */ + + case META_COND_VERSION: + bravalue = OP_COND; + if (pptr[1] > 0) + code[1+LINK_SIZE] = ((PCRE2_MAJOR > pptr[2]) || + (PCRE2_MAJOR == pptr[2] && PCRE2_MINOR >= pptr[3]))? + OP_TRUE : OP_FALSE; + else + code[1+LINK_SIZE] = (PCRE2_MAJOR == pptr[2] && PCRE2_MINOR == pptr[3])? + OP_TRUE : OP_FALSE; + skipunits = 1; + pptr += 3; + goto GROUP_PROCESS_NOTE_EMPTY; + + /* The condition is an assertion, possibly preceded by a callout. */ + + case META_COND_ASSERT: + bravalue = OP_COND; + goto GROUP_PROCESS_NOTE_EMPTY; + + + /* ===================================================================*/ + /* Handle all kinds of nested bracketed groups. The non-capturing, + non-conditional cases are here; others come to GROUP_PROCESS via goto. */ + + case META_LOOKAHEAD: + bravalue = OP_ASSERT; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird + thing to do, but Perl allows all assertions to be quantified, and when + they contain capturing parentheses there may be a potential use for + this feature. Not that that applies to a quantified (?!) but we allow + it for uniformity. */ + + case META_LOOKAHEADNOT: + if (pptr[1] == META_KET && + (pptr[2] < META_ASTERISK || pptr[2] > META_MINMAX_QUERY)) + { + *code++ = OP_FAIL; + pptr++; + } + else + { + bravalue = OP_ASSERT_NOT; + cb->assert_depth += 1; + goto GROUP_PROCESS; + } + break; + + case META_LOOKBEHIND: + bravalue = OP_ASSERTBACK; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + case META_LOOKBEHINDNOT: + bravalue = OP_ASSERTBACK_NOT; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + case META_ATOMIC: + bravalue = OP_ONCE; + goto GROUP_PROCESS_NOTE_EMPTY; + + case META_NOCAPTURE: + bravalue = OP_BRA; + /* Fall through */ + + /* Process nested bracketed regex. The nesting depth is maintained for the + benefit of the stackguard function. The test for too deep nesting is now + done in parse_regex(). Assertion and DEFINE groups come to GROUP_PROCESS; + others come to GROUP_PROCESS_NOTE_EMPTY, to indicate that we need to take + note of whether or not they may match an empty string. */ + + GROUP_PROCESS_NOTE_EMPTY: + note_group_empty = TRUE; + + GROUP_PROCESS: + cb->parens_depth += 1; + *code = bravalue; + pptr++; + tempcode = code; + tempreqvary = cb->req_varyopt; /* Save value before group */ + templastcapture = cb->lastcapture; /* Save value before group */ + length_prevgroup = 0; /* Initialize for pre-compile phase */ + + if ((group_return = + compile_regex( + options, /* The option state */ + &tempcode, /* Where to put code (updated) */ + &pptr, /* Input pointer (updated) */ + errorcodeptr, /* Where to put an error message */ + skipunits, /* Skip over bracket number */ + &subfirstcu, /* For possible first char */ + &subfirstcuflags, + &subreqcu, /* For possible last char */ + &subreqcuflags, + bcptr, /* Current branch chain */ + cb, /* Compile data block */ + (lengthptr == NULL)? NULL : /* Actual compile phase */ + &length_prevgroup /* Pre-compile phase */ + )) == 0) + return 0; /* Error */ + + cb->parens_depth -= 1; + + /* If that was a non-conditional significant group (not an assertion, not a + DEFINE) that matches at least one character, then the current item matches + a character. Conditionals are handled below. */ + + if (note_group_empty && bravalue != OP_COND && group_return > 0) + matched_char = TRUE; + + /* If that was an atomic group and there are no capturing groups within it, + generate OP_ONCE_NC instead of OP_ONCE. */ + + if (bravalue == OP_ONCE && cb->lastcapture <= templastcapture) + *code = OP_ONCE_NC; + + /* If we've just compiled an assertion, pop the assert depth. */ + + if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) + cb->assert_depth -= 1; + + /* At the end of compiling, code is still pointing to the start of the + group, while tempcode has been updated to point past the end of the group. + The parsed pattern pointer (pptr) is on the closing META_KET. + + If this is a conditional bracket, check that there are no more than + two branches in the group, or just one if it's a DEFINE group. We do this + in the real compile phase, not in the pre-pass, where the whole group may + not be available. */ + + if (bravalue == OP_COND && lengthptr == NULL) + { + PCRE2_UCHAR *tc = code; + int condcount = 0; + + do { + condcount++; + tc += GET(tc,1); + } + while (*tc != OP_KET); + + /* A DEFINE group is never obeyed inline (the "condition" is always + false). It must have only one branch. Having checked this, change the + opcode to OP_FALSE. */ + + if (code[LINK_SIZE+1] == OP_DEFINE) + { + if (condcount > 1) + { + cb->erroroffset = offset; + *errorcodeptr = ERR54; + return 0; + } + code[LINK_SIZE+1] = OP_FALSE; + bravalue = OP_DEFINE; /* A flag to suppress char handling below */ + } + + /* A "normal" conditional group. If there is just one branch, we must not + make use of its firstcu or reqcu, because this is equivalent to an + empty second branch. Also, it may match an empty string. If there are two + branches, this item must match a character if the group must. */ + + else + { + if (condcount > 2) + { + cb->erroroffset = offset; + *errorcodeptr = ERR27; + return 0; + } + if (condcount == 1) subfirstcuflags = subreqcuflags = REQ_NONE; + else if (group_return > 0) matched_char = TRUE; + } + } + + /* In the pre-compile phase, update the length by the length of the group, + less the brackets at either end. Then reduce the compiled code to just a + set of non-capturing brackets so that it doesn't use much memory if it is + duplicated by a quantifier.*/ + + if (lengthptr != NULL) + { + if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; + code++; /* This already contains bravalue */ + PUTINC(code, 0, 1 + LINK_SIZE); + *code++ = OP_KET; + PUTINC(code, 0, 1 + LINK_SIZE); + break; /* No need to waste time with special character handling */ + } + + /* Otherwise update the main code pointer to the end of the group. */ + + code = tempcode; + + /* For a DEFINE group, required and first character settings are not + relevant. */ + + if (bravalue == OP_DEFINE) break; + + /* Handle updating of the required and first code units for other types of + group. Update for normal brackets of all kinds, and conditions with two + branches (see code above). If the bracket is followed by a quantifier with + zero repeat, we have to back off. Hence the definition of zeroreqcu and + zerofirstcu outside the main loop so that they can be accessed for the back + off. */ + + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + groupsetfirstcu = FALSE; + + if (bravalue >= OP_ONCE) /* Not an assertion */ + { + /* If we have not yet set a firstcu in this branch, take it from the + subpattern, remembering that it was set here so that a repeat of more + than one can replicate it as reqcu if necessary. If the subpattern has + no firstcu, set "none" for the whole branch. In both cases, a zero + repeat forces firstcu to "none". */ + + if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) + { + if (subfirstcuflags >= 0) + { + firstcu = subfirstcu; + firstcuflags = subfirstcuflags; + groupsetfirstcu = TRUE; + } + else firstcuflags = REQ_NONE; + zerofirstcuflags = REQ_NONE; + } + + /* If firstcu was previously set, convert the subpattern's firstcu + into reqcu if there wasn't one, using the vary flag that was in + existence beforehand. */ + + else if (subfirstcuflags >= 0 && subreqcuflags < 0) + { + subreqcu = subfirstcu; + subreqcuflags = subfirstcuflags | tempreqvary; + } + + /* If the subpattern set a required code unit (or set a first code unit + that isn't really the first code unit - see above), set it. */ + + if (subreqcuflags >= 0) + { + reqcu = subreqcu; + reqcuflags = subreqcuflags; + } + } + + /* For a forward assertion, we take the reqcu, if set, provided that the + group has also set a firstcu. This can be helpful if the pattern that + follows the assertion doesn't set a different char. For example, it's + useful for /(?=abcde).+/. We can't set firstcu for an assertion, however + because it leads to incorrect effect for patterns such as /(?=a)a.+/ when + the "real" "a" would then become a reqcu instead of a firstcu. This is + overcome by a scan at the end if there's no firstcu, looking for an + asserted first char. A similar effect for patterns like /(?=.*X)X$/ means + we must only take the reqcu when the group also set a firstcu. Otherwise, + in that example, 'X' ends up set for both. */ + + else if (bravalue == OP_ASSERT && subreqcuflags >= 0 && + subfirstcuflags >= 0) + { + reqcu = subreqcu; + reqcuflags = subreqcuflags; + } + + break; /* End of nested group handling */ + + + /* ===================================================================*/ + /* Handle named backreferences and recursions. */ + + case META_BACKREF_BYNAME: + case META_RECURSE_BYNAME: + { + int count, index; + PCRE2_SPTR name; + BOOL is_dupname = FALSE; + named_group *ng = cb->named_groups; + uint32_t length = *(++pptr); + + GETPLUSOFFSET(offset, pptr); + name = cb->start_pattern + offset; + + /* In the first pass, the names generated in the pre-pass are available, + but the main name table has not yet been created. Scan the list of names + generated in the pre-pass in order to get a number and whether or not + this name is duplicated. */ + + groupnumber = 0; + for (i = 0; i < cb->names_found; i++, ng++) + { + if (length == ng->length && + PRIV(strncmp)(name, ng->name, length) == 0) + { + is_dupname = ng->isdup; + groupnumber = ng->number; + + /* For a recursion, that's all that is needed. We can now go to + the code above that handles numerical recursion, applying it to + the first group with the given name. */ + + if (meta == META_RECURSE_BYNAME) + { + meta_arg = groupnumber; + goto HANDLE_NUMERICAL_RECURSION; + } + + /* For a back reference, update the back reference map and the + maximum back reference. Then, for each group, we must check to + see if it is recursive, that is, it is inside the group that it + references. A flag is set so that the group can be made atomic. + */ + + cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1; + if (groupnumber > cb->top_backref) + cb->top_backref = groupnumber; + + for (oc = cb->open_caps; oc != NULL; oc = oc->next) + { + if (oc->number == groupnumber) + { + oc->flag = TRUE; + break; + } + } + } + } + + /* If the name was not found we have a bad reference. */ + + if (groupnumber == 0) + { + *errorcodeptr = ERR15; + cb->erroroffset = offset; + return 0; + } + + /* If a back reference name is not duplicated, we can handle it as + a numerical reference. */ + + if (!is_dupname) + { + meta_arg = groupnumber; + goto HANDLE_SINGLE_REFERENCE; + } + + /* If a back reference name is duplicated, we generate a different + opcode to a numerical back reference. In the second pass we must + search for the index and count in the final name table. */ + + count = 0; /* Values for first pass (avoids compiler warning) */ + index = 0; + if (lengthptr == NULL && !find_dupname_details(name, length, &index, + &count, errorcodeptr, cb)) return 0; + + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF; + PUT2INC(code, 0, index); + PUT2INC(code, 0, count); + } + break; + + + /* ===================================================================*/ + /* Handle a numerical callout. */ + + case META_CALLOUT_NUMBER: + code[0] = OP_CALLOUT; + PUT(code, 1, pptr[1]); /* Offset to next pattern item */ + PUT(code, 1 + LINK_SIZE, pptr[2]); /* Length of next pattern item */ + code[1 + 2*LINK_SIZE] = pptr[3]; + pptr += 3; + code += PRIV(OP_lengths)[OP_CALLOUT]; + break; + + + /* ===================================================================*/ + /* Handle a callout with a string argument. In the pre-pass we just compute + the length without generating anything. The length in pptr[3] includes both + delimiters; in the actual compile only the first one is copied, but a + terminating zero is added. Any doubled delimiters within the string make + this an overestimate, but it is not worth bothering about. */ + + case META_CALLOUT_STRING: + if (lengthptr != NULL) + { + *lengthptr += pptr[3] + (1 + 4*LINK_SIZE); + pptr += 3; + SKIPOFFSET(pptr); + } + + /* In the real compile we can copy the string. The starting delimiter is + included so that the client can discover it if they want. We also pass the + start offset to help a script language give better error messages. */ + + else + { + PCRE2_SPTR pp; + uint32_t delimiter; + uint32_t length = pptr[3]; + PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE); + + code[0] = OP_CALLOUT_STR; + PUT(code, 1, pptr[1]); /* Offset to next pattern item */ + PUT(code, 1 + LINK_SIZE, pptr[2]); /* Length of next pattern item */ + + pptr += 3; + GETPLUSOFFSET(offset, pptr); /* Offset to string in pattern */ + pp = cb->start_pattern + offset; + delimiter = *callout_string++ = *pp++; + if (delimiter == CHAR_LEFT_CURLY_BRACKET) + delimiter = CHAR_RIGHT_CURLY_BRACKET; + PUT(code, 1 + 3*LINK_SIZE, (int)(offset + 1)); /* One after delimiter */ + + /* The syntax of the pattern was checked in the parsing scan. The length + includes both delimiters, but we have passed the opening one just above, + so we reduce length before testing it. The test is for > 1 because we do + not want to copy the final delimiter. This also ensures that pp[1] is + accessible. */ + + while (--length > 1) + { + if (*pp == delimiter && pp[1] == delimiter) + { + *callout_string++ = delimiter; + pp += 2; + length--; + } + else *callout_string++ = *pp++; + } + *callout_string++ = CHAR_NULL; + + /* Set the length of the entire item, the advance to its end. */ + + PUT(code, 1 + 2*LINK_SIZE, (int)(callout_string - code)); + code = callout_string; + } + break; + + + /* ===================================================================*/ + /* Handle repetition. The different types are all sorted out in the parsing + pass. */ + + case META_MINMAX_PLUS: + case META_MINMAX_QUERY: + case META_MINMAX: + repeat_min = *(++pptr); + repeat_max = *(++pptr); + goto REPEAT; + + case META_ASTERISK: + case META_ASTERISK_PLUS: + case META_ASTERISK_QUERY: + repeat_min = 0; + repeat_max = REPEAT_UNLIMITED; + goto REPEAT; + + case META_PLUS: + case META_PLUS_PLUS: + case META_PLUS_QUERY: + repeat_min = 1; + repeat_max = REPEAT_UNLIMITED; + goto REPEAT; + + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + repeat_min = 0; + repeat_max = 1; + + REPEAT: + if (previous_matched_char && repeat_min > 0) matched_char = TRUE; + + /* Remember whether this is a variable length repeat, and default to + single-char opcodes. */ + + reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; + op_type = 0; + + /* If the repeat is {1} we can ignore it. */ + + if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT; + + /* Adjust first and required code units for a zero repeat. */ + + if (repeat_min == 0) + { + firstcu = zerofirstcu; + firstcuflags = zerofirstcuflags; + reqcu = zeroreqcu; + reqcuflags = zeroreqcuflags; + } + + /* Note the greediness and possessiveness. */ + + switch (meta) + { + case META_MINMAX_PLUS: + case META_ASTERISK_PLUS: + case META_PLUS_PLUS: + case META_QUERY_PLUS: + repeat_type = 0; /* Force greedy */ + possessive_quantifier = TRUE; break; - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - case OP_TYPEEXACT: - case OP_TYPEPOSUPTO: - if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) - code += 2; + case META_MINMAX_QUERY: + case META_ASTERISK_QUERY: + case META_PLUS_QUERY: + case META_QUERY_QUERY: + repeat_type = greedy_non_default; + possessive_quantifier = FALSE; break; - case OP_MARK: - case OP_PRUNE_ARG: - case OP_SKIP_ARG: - case OP_THEN_ARG: - code += code[1]; + default: + repeat_type = greedy_default; + possessive_quantifier = FALSE; break; } - /* Add in the fixed length from the table */ + /* Save start of previous item, in case we have to move it up in order to + insert something before it, and remember what it was. */ - code += PRIV(OP_lengths)[c]; + tempcode = previous; + op_previous = *previous; - /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be - followed by a multi-byte character. The length in the table is a minimum, so - we have to arrange to skip the extra bytes. */ + /* If previous was a recursion call, wrap it in atomic brackets so that + previous becomes the atomic group. All recursions were so wrapped in the + past, but it no longer happens for non-repeated recursions. In fact, the + repeated ones could be re-implemented independently so as not to need this, + but for the moment we rely on the code for repeating groups. */ -#ifdef MAYBE_UTF_MULTI - if (utf) switch(c) + if (op_previous == OP_RECURSE) { + memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); + op_previous = *previous = OP_ONCE; + PUT(previous, 1, 2 + 2*LINK_SIZE); + previous[2 + 2*LINK_SIZE] = OP_KET; + PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); + code += 2 + 2 * LINK_SIZE; + length_prevgroup = 3 + 3*LINK_SIZE; + group_return = -1; /* Set "may match empty string" */ + } + + /* Now handle repetition for the different types of item. */ + + switch (op_previous) + { + /* If previous was a character or negated character match, abolish the + item and generate a repeat item instead. If a char item has a minimum of + more than one, ensure that it is set in reqcu - it might not be if a + sequence such as x{3} is the first thing in a branch because the x will + have gone into firstcu instead. */ + case OP_CHAR: case OP_CHARI: case OP_NOT: case OP_NOTI: - case OP_EXACT: - case OP_EXACTI: - case OP_NOTEXACT: - case OP_NOTEXACTI: - case OP_UPTO: - case OP_UPTOI: - case OP_NOTUPTO: - case OP_NOTUPTOI: - case OP_MINUPTO: - case OP_MINUPTOI: - case OP_NOTMINUPTO: - case OP_NOTMINUPTOI: - case OP_POSUPTO: - case OP_POSUPTOI: - case OP_NOTPOSUPTO: - case OP_NOTPOSUPTOI: - case OP_STAR: - case OP_STARI: - case OP_NOTSTAR: - case OP_NOTSTARI: - case OP_MINSTAR: - case OP_MINSTARI: - case OP_NOTMINSTAR: - case OP_NOTMINSTARI: - case OP_POSSTAR: - case OP_POSSTARI: - case OP_NOTPOSSTAR: - case OP_NOTPOSSTARI: - case OP_PLUS: - case OP_PLUSI: - case OP_NOTPLUS: - case OP_NOTPLUSI: - case OP_MINPLUS: - case OP_MINPLUSI: - case OP_NOTMINPLUS: - case OP_NOTMINPLUSI: - case OP_POSPLUS: - case OP_POSPLUSI: - case OP_NOTPOSPLUS: - case OP_NOTPOSPLUSI: - case OP_QUERY: - case OP_QUERYI: - case OP_NOTQUERY: - case OP_NOTQUERYI: - case OP_MINQUERY: - case OP_MINQUERYI: - case OP_NOTMINQUERY: - case OP_NOTMINQUERYI: - case OP_POSQUERY: - case OP_POSQUERYI: - case OP_NOTPOSQUERY: - case OP_NOTPOSQUERYI: - if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); - break; - } -#else - (void)(utf); /* Keep compiler happy by referencing function argument */ + op_type = chartypeoffset[op_previous - OP_CHAR]; + + /* Deal with UTF characters that take up more than one code unit. */ + +#ifdef MAYBE_UTF_MULTI + if (utf && NOT_FIRSTCU(code[-1])) + { + PCRE2_UCHAR *lastchar = code - 1; + BACKCHAR(lastchar); + mclength = (uint32_t)(code - lastchar); /* Length of UTF character */ + memcpy(mcbuffer, lastchar, CU2BYTES(mclength)); /* Save the char */ + } + else #endif /* MAYBE_UTF_MULTI */ - } + + /* Handle the case of a single code unit - either with no UTF support, or + with UTF disabled, or for a single-code-unit UTF character. */ + { + mcbuffer[0] = code[-1]; + mclength = 1; + if (op_previous <= OP_CHARI && repeat_min > 1) + { + reqcu = mcbuffer[0]; + reqcuflags = req_caseopt | cb->req_varyopt; + } + } + goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ + + /* If previous was a character class or a back reference, we put the + repeat stuff after it, but just skip the item if the repeat was {0,0}. */ + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: +#endif + case OP_CLASS: + case OP_NCLASS: + case OP_REF: + case OP_REFI: + case OP_DNREF: + case OP_DNREFI: + + if (repeat_max == 0) + { + code = previous; + goto END_REPEAT; + } + + if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED) + *code++ = OP_CRSTAR + repeat_type; + else if (repeat_min == 1 && repeat_max == REPEAT_UNLIMITED) + *code++ = OP_CRPLUS + repeat_type; + else if (repeat_min == 0 && repeat_max == 1) + *code++ = OP_CRQUERY + repeat_type; + else + { + *code++ = OP_CRRANGE + repeat_type; + PUT2INC(code, 0, repeat_min); + if (repeat_max == REPEAT_UNLIMITED) repeat_max = 0; /* 2-byte encoding for max */ + PUT2INC(code, 0, repeat_max); + } + break; + + /* If previous is OP_FAIL, it was generated by an empty class [] + (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be + generated, that is by (*FAIL) or (?!), disallow a quantifier at parse + time. We can just ignore this repeat. */ + + case OP_FAIL: + goto END_REPEAT; + + /* If previous was a bracket group, we may have to replicate it in + certain cases. Note that at this point we can encounter only the "basic" + bracket opcodes such as BRA and CBRA, as this is the place where they get + converted into the more special varieties such as BRAPOS and SBRA. + Originally, PCRE did not allow repetition of assertions, but now it does, + for Perl compatibility. */ + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ONCE: + case OP_ONCE_NC: + case OP_BRA: + case OP_CBRA: + case OP_COND: + { + int len = (int)(code - previous); + PCRE2_UCHAR *bralink = NULL; + PCRE2_UCHAR *brazeroptr = NULL; + + /* Repeating a DEFINE group (or any group where the condition is always + FALSE and there is only one branch) is pointless, but Perl allows the + syntax, so we just ignore the repeat. */ + + if (op_previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE && + previous[GET(previous, 1)] != OP_ALT) + goto END_REPEAT; + + /* There is no sense in actually repeating assertions. The only potential + use of repetition is in cases when the assertion is optional. Therefore, + if the minimum is greater than zero, just ignore the repeat. If the + maximum is not zero or one, set it to 1. */ + + if (op_previous < OP_ONCE) /* Assertion */ + { + if (repeat_min > 0) goto END_REPEAT; + if (repeat_max > 1) repeat_max = 1; + } + + /* The case of a zero minimum is special because of the need to stick + OP_BRAZERO in front of it, and because the group appears once in the + data, whereas in other cases it appears the minimum number of times. For + this reason, it is simplest to treat this case separately, as otherwise + the code gets far too messy. There are several special subcases when the + minimum is zero. */ + + if (repeat_min == 0) + { + /* If the maximum is also zero, we used to just omit the group from + the output altogether, like this: + + ** if (repeat_max == 0) + ** { + ** code = previous; + ** goto END_REPEAT; + ** } + + However, that fails when a group or a subgroup within it is + referenced as a subroutine from elsewhere in the pattern, so now we + stick in OP_SKIPZERO in front of it so that it is skipped on + execution. As we don't have a list of which groups are referenced, we + cannot do this selectively. + + If the maximum is 1 or unlimited, we just have to stick in the + BRAZERO and do no more at this point. */ + + if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED) + { + memmove(previous + 1, previous, CU2BYTES(len)); + code++; + if (repeat_max == 0) + { + *previous++ = OP_SKIPZERO; + goto END_REPEAT; + } + brazeroptr = previous; /* Save for possessive optimizing */ + *previous++ = OP_BRAZERO + repeat_type; + } + + /* If the maximum is greater than 1 and limited, we have to replicate + in a nested fashion, sticking OP_BRAZERO before each set of brackets. + The first one has to be handled carefully because it's the original + copy, which has to be moved up. The remainder can be handled by code + that is common with the non-zero minimum case below. We have to + adjust the value or repeat_max, since one less copy is required. */ + + else + { + int linkoffset; + memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); + code += 2 + LINK_SIZE; + *previous++ = OP_BRAZERO + repeat_type; + *previous++ = OP_BRA; + + /* We chain together the bracket link offset fields that have to be + filled in later when the ends of the brackets are reached. */ + + linkoffset = (bralink == NULL)? 0 : (int)(previous - bralink); + bralink = previous; + PUTINC(previous, 0, linkoffset); + } + + if (repeat_max != REPEAT_UNLIMITED) repeat_max--; + } + + /* If the minimum is greater than zero, replicate the group as many + times as necessary, and adjust the maximum to the number of subsequent + copies that we need. */ + + else + { + if (repeat_min > 1) + { + /* In the pre-compile phase, we don't actually do the replication. + We just adjust the length as if we had. Do some paranoid checks for + potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit + integer type when available, otherwise double. */ + + if (lengthptr != NULL) + { + PCRE2_SIZE delta = (repeat_min - 1)*length_prevgroup; + if ((INT64_OR_DOUBLE)(repeat_min - 1)* + (INT64_OR_DOUBLE)length_prevgroup > + (INT64_OR_DOUBLE)INT_MAX || + OFLOW_MAX - *lengthptr < delta) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += delta; + } + + /* This is compiling for real. If there is a set first code unit + for the group, and we have not yet set a "required code unit", set + it. */ + + else + { + if (groupsetfirstcu && reqcuflags < 0) + { + reqcu = firstcu; + reqcuflags = firstcuflags; + } + for (i = 1; (uint32_t)i < repeat_min; i++) + { + memcpy(code, previous, CU2BYTES(len)); + code += len; + } + } + } + + if (repeat_max != REPEAT_UNLIMITED) repeat_max -= repeat_min; + } + + /* This code is common to both the zero and non-zero minimum cases. If + the maximum is limited, it replicates the group in a nested fashion, + remembering the bracket starts on a stack. In the case of a zero + minimum, the first one was set up above. In all cases the repeat_max + now specifies the number of additional copies needed. Again, we must + remember to replicate entries on the forward reference list. */ + + if (repeat_max != REPEAT_UNLIMITED) + { + /* In the pre-compile phase, we don't actually do the replication. We + just adjust the length as if we had. For each repetition we must add + 1 to the length for BRAZERO and for all but the last repetition we + must add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some + paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type + is a 64-bit integer type when available, otherwise double. */ + + if (lengthptr != NULL && repeat_max > 0) + { + PCRE2_SIZE delta = repeat_max*(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - + 2 - 2*LINK_SIZE; /* Last one doesn't nest */ + if ((INT64_OR_DOUBLE)repeat_max * + (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) + > (INT64_OR_DOUBLE)INT_MAX || + OFLOW_MAX - *lengthptr < delta) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += delta; + } + + /* This is compiling for real */ + + else for (i = repeat_max - 1; i >= 0; i--) + { + *code++ = OP_BRAZERO + repeat_type; + + /* All but the final copy start a new nesting, maintaining the + chain of brackets outstanding. */ + + if (i != 0) + { + int linkoffset; + *code++ = OP_BRA; + linkoffset = (bralink == NULL)? 0 : (int)(code - bralink); + bralink = code; + PUTINC(code, 0, linkoffset); + } + + memcpy(code, previous, CU2BYTES(len)); + code += len; + } + + /* Now chain through the pending brackets, and fill in their length + fields (which are holding the chain links pro tem). */ + + while (bralink != NULL) + { + int oldlinkoffset; + int linkoffset = (int)(code - bralink + 1); + PCRE2_UCHAR *bra = code - linkoffset; + oldlinkoffset = GET(bra, 1); + bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; + *code++ = OP_KET; + PUTINC(code, 0, linkoffset); + PUT(bra, 1, linkoffset); + } + } + + /* If the maximum is unlimited, set a repeater in the final copy. For + ONCE brackets, that's all we need to do. However, possessively repeated + ONCE brackets can be converted into non-capturing brackets, as the + behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to + deal with possessive ONCEs specially. + + Otherwise, when we are doing the actual compile phase, check to see + whether this group is one that could match an empty string. If so, + convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so + that runtime checking can be done. [This check is also applied to ONCE + groups at runtime, but in a different way.] + + Then, if the quantifier was possessive and the bracket is not a + conditional, we convert the BRA code to the POS form, and the KET code to + KETRPOS. (It turns out to be convenient at runtime to detect this kind of + subpattern at both the start and at the end.) The use of special opcodes + makes it possible to reduce greatly the stack usage in pcre2_match(). If + the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. + + Then, if the minimum number of matches is 1 or 0, cancel the possessive + flag so that the default action below, of wrapping everything inside + atomic brackets, does not happen. When the minimum is greater than 1, + there will be earlier copies of the group, and so we still have to wrap + the whole thing. */ + + else + { + PCRE2_UCHAR *ketcode = code - 1 - LINK_SIZE; + PCRE2_UCHAR *bracode = ketcode - GET(ketcode, 1); + + /* Convert possessive ONCE brackets to non-capturing */ + + if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && + possessive_quantifier) *bracode = OP_BRA; + + /* For non-possessive ONCE brackets, all we need to do is to + set the KET. */ + + if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) + *ketcode = OP_KETRMAX + repeat_type; + + /* Handle non-ONCE brackets and possessive ONCEs (which have been + converted to non-capturing above). */ + + else + { + /* In the compile phase, adjust the opcode if the group can match + an empty string. For a conditional group with only one branch, the + value of group_return will not show "could be empty", so we must + check that separately. */ + + if (lengthptr == NULL) + { + if (group_return < 0) *bracode += OP_SBRA - OP_BRA; + if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT) + *bracode = OP_SCOND; + } + + /* Handle possessive quantifiers. */ + + if (possessive_quantifier) + { + /* For COND brackets, we wrap the whole thing in a possessively + repeated non-capturing bracket, because we have not invented POS + versions of the COND opcodes. */ + + if (*bracode == OP_COND || *bracode == OP_SCOND) + { + int nlen = (int)(code - bracode); + memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); + code += 1 + LINK_SIZE; + nlen += 1 + LINK_SIZE; + *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; + *code++ = OP_KETRPOS; + PUTINC(code, 0, nlen); + PUT(bracode, 1, nlen); + } + + /* For non-COND brackets, we modify the BRA code and use KETRPOS. */ + + else + { + *bracode += 1; /* Switch to xxxPOS opcodes */ + *ketcode = OP_KETRPOS; + } + + /* If the minimum is zero, mark it as possessive, then unset the + possessive flag when the minimum is 0 or 1. */ + + if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; + if (repeat_min < 2) possessive_quantifier = FALSE; + } + + /* Non-possessive quantifier */ + + else *ketcode = OP_KETRMAX + repeat_type; + } + } + } + break; + + /* If previous was a character type match (\d or similar), abolish it and + create a suitable repeat item. The code is shared with single-character + repeats by setting op_type to add a suitable offset into repeat_type. + Note the the Unicode property types will be present only when + SUPPORT_UNICODE is defined, but we don't wrap the little bits of code + here because it just makes it horribly messy. */ + + default: + if (op_previous >= OP_EODN) /* Not a character type - internal error */ + { + *errorcodeptr = ERR10; + return 0; + } + else + { + int prop_type, prop_value; + PCRE2_UCHAR *oldcode; + + op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ + mclength = 0; /* Not a character */ + + if (op_previous == OP_PROP || op_previous == OP_NOTPROP) + { + prop_type = previous[1]; + prop_value = previous[2]; + } + else + { + /* Come here from just above with a character in mcbuffer/mclength. */ + OUTPUT_SINGLE_REPEAT: + prop_type = prop_value = -1; + } + + /* At this point, if prop_type == prop_value == -1 we either have a + character in mcbuffer when mclength is greater than zero, or we have + mclength zero, in which case there is a non-property character type in + op_previous. If prop_type/value are not negative, we have a property + character type in op_previous. */ + + oldcode = code; /* Save where we were */ + code = previous; /* Usually overwrite previous item */ + + /* If the maximum is zero then the minimum must also be zero; Perl allows + this case, so we do too - by simply omitting the item altogether. */ + + if (repeat_max == 0) goto END_REPEAT; + + /* Combine the op_type with the repeat_type */ + + repeat_type += op_type; + + /* A minimum of zero is handled either as the special case * or ?, or as + an UPTO, with the maximum given. */ + + if (repeat_min == 0) + { + if (repeat_max == REPEAT_UNLIMITED) *code++ = OP_STAR + repeat_type; + else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; + else + { + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + + /* A repeat minimum of 1 is optimized into some special cases. If the + maximum is unlimited, we use OP_PLUS. Otherwise, the original item is + left in place and, if the maximum is greater than 1, we use OP_UPTO with + one less than the maximum. */ + + else if (repeat_min == 1) + { + if (repeat_max == REPEAT_UNLIMITED) + *code++ = OP_PLUS + repeat_type; + else + { + code = oldcode; /* Leave previous item in place */ + if (repeat_max == 1) goto END_REPEAT; + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max - 1); + } + } + + /* The case {n,n} is just an EXACT, while the general case {n,m} is + handled as an EXACT followed by an UPTO or STAR or QUERY. */ + + else + { + *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ + PUT2INC(code, 0, repeat_min); + + /* Unless repeat_max equals repeat_min, fill in the data for EXACT, + and then generate the second opcode. For a repeated Unicode property + match, there are two extra values that define the required property, + and mclength is set zero to indicate this. */ + + if (repeat_max != repeat_min) + { + if (mclength > 0) + { + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + } + else + { + *code++ = op_previous; + if (prop_type >= 0) + { + *code++ = prop_type; + *code++ = prop_value; + } + } + + /* Now set up the following opcode */ + + if (repeat_max == REPEAT_UNLIMITED) + *code++ = OP_STAR + repeat_type; + else + { + repeat_max -= repeat_min; + if (repeat_max == 1) + { + *code++ = OP_QUERY + repeat_type; + } + else + { + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + } + } + + /* Fill in the character or character type for the final opcode. */ + + if (mclength > 0) + { + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + } + else + { + *code++ = op_previous; + if (prop_type >= 0) + { + *code++ = prop_type; + *code++ = prop_value; + } + } + } + break; + } /* End of switch on different op_previous values */ + + + /* If the character following a repeat is '+', possessive_quantifier is + TRUE. For some opcodes, there are special alternative opcodes for this + case. For anything else, we wrap the entire repeated item inside OP_ONCE + brackets. Logically, the '+' notation is just syntactic sugar, taken from + Sun's Java package, but the special opcodes can optimize it. + + Some (but not all) possessively repeated subpatterns have already been + completely handled in the code just above. For them, possessive_quantifier + is always FALSE at this stage. Note that the repeated item starts at + tempcode, not at previous, which might be the first part of a string whose + (former) last char we repeated. */ + + if (possessive_quantifier) + { + int len; + + /* Possessifying an EXACT quantifier has no effect, so we can ignore it. + However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6}, + {5,}, or {5,10}). We skip over an EXACT item; if the length of what + remains is greater than zero, there's a further opcode that can be + handled. If not, do nothing, leaving the EXACT alone. */ + + switch(*tempcode) + { + case OP_TYPEEXACT: + tempcode += PRIV(OP_lengths)[*tempcode] + + ((tempcode[1 + IMM2_SIZE] == OP_PROP + || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); + break; + + /* CHAR opcodes are used for exacts whose count is 1. */ + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_EXACT: + case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: + tempcode += PRIV(OP_lengths)[*tempcode]; +#ifdef SUPPORT_UNICODE + if (utf && HAS_EXTRALEN(tempcode[-1])) + tempcode += GET_EXTRALEN(tempcode[-1]); +#endif + break; + + /* For the class opcodes, the repeat operator appears at the end; + adjust tempcode to point to it. */ + + case OP_CLASS: + case OP_NCLASS: + tempcode += 1 + 32/sizeof(PCRE2_UCHAR); + break; + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + tempcode += GET(tempcode, 1); + break; +#endif + } + + /* If tempcode is equal to code (which points to the end of the repeated + item), it means we have skipped an EXACT item but there is no following + QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In + all other cases, tempcode will be pointing to the repeat opcode, and will + be less than code, so the value of len will be greater than 0. */ + + len = (int)(code - tempcode); + if (len > 0) + { + unsigned int repcode = *tempcode; + + /* There is a table for possessifying opcodes, all of which are less + than OP_CALLOUT. A zero entry means there is no possessified version. + */ + + if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0) + *tempcode = opcode_possessify[repcode]; + + /* For opcode without a special possessified version, wrap the item in + ONCE brackets. */ + + else + { + memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); + code += 1 + LINK_SIZE; + len += 1 + LINK_SIZE; + tempcode[0] = OP_ONCE; + *code++ = OP_KET; + PUTINC(code, 0, len); + PUT(tempcode, 1, len); + } + } + } + + /* We set the "follows varying string" flag for subsequently encountered + reqcus if it isn't already set and we have just passed a varying length + item. */ + + END_REPEAT: + cb->req_varyopt |= reqvary; + break; + + + /* ===================================================================*/ + /* Handle a 32-bit data character with a value greater than META_END. */ + + case META_BIGVALUE: + pptr++; + goto NORMAL_CHAR; + + + /* ===============================================================*/ + /* Handle a back reference by number, which is the meta argument. The + pattern offsets for back references to group numbers less than 10 are held + in a special vector, to avoid using more than two parsed pattern elements + in 64-bit environments. We only need the offset to the first occurrence, + because if that doesn't fail, subsequent ones will also be OK. */ + + case META_BACKREF: + if (meta_arg < 10) offset = cb->small_ref_offset[meta_arg]; + else GETPLUSOFFSET(offset, pptr); + + if (meta_arg > cb->bracount) + { + cb->erroroffset = offset; + *errorcodeptr = ERR15; /* Non-existent subpattern */ + return 0; + } + + /* Come here from named backref handling when the reference is to a + single group (that is, not to a duplicated name). The back reference + data will have already been updated. We must disable firstcu if not + set, to cope with cases like (?=(\w+))\1: which would otherwise set ':' + later. */ + + HANDLE_SINGLE_REFERENCE: + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; + PUT2INC(code, 0, meta_arg); + + /* Update the map of back references, and keep the highest one. We + could do this in parse_regex() for numerical back references, but not + for named back references, because we don't know the numbers to which + named back references refer. So we do it all in this function. */ + + cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1; + if (meta_arg > cb->top_backref) cb->top_backref = meta_arg; + + /* Check to see if this back reference is recursive, that it, it + is inside the group that it references. A flag is set so that the + group can be made atomic. */ + + for (oc = cb->open_caps; oc != NULL; oc = oc->next) + { + if (oc->number == meta_arg) + { + oc->flag = TRUE; + break; + } + } + break; + + + /* ===============================================================*/ + /* Handle recursion by inserting the number of the called group (which is + the meta argument) after OP_RECURSE. At the end of compiling the pattern is + scanned and these numbers are replaced by offsets within the pattern. It is + done like this to avoid problems with forward references and adjusting + offsets when groups are duplicated and moved (as discovered in previous + implementations). Note that a recursion does not have a set first character + (relevant if it is repeated, because it will then be wrapped with ONCE + brackets). */ + + case META_RECURSE: + GETPLUSOFFSET(offset, pptr); + if (meta_arg > cb->bracount) + { + cb->erroroffset = offset; + *errorcodeptr = ERR15; /* Non-existent subpattern */ + return 0; + } + HANDLE_NUMERICAL_RECURSION: + *code = OP_RECURSE; + PUT(code, 1, meta_arg); + code += 1 + LINK_SIZE; + groupsetfirstcu = FALSE; + cb->had_recurse = TRUE; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + break; + + + /* ===============================================================*/ + /* Handle capturing parentheses; the number is the meta argument. */ + + case META_CAPTURE: + bravalue = OP_CBRA; + skipunits = IMM2_SIZE; + PUT2(code, 1+LINK_SIZE, meta_arg); + cb->lastcapture = meta_arg; + goto GROUP_PROCESS_NOTE_EMPTY; + + + /* ===============================================================*/ + /* Handle escape sequence items. For ones like \d, the ESC_values are + arranged to be the same as the corresponding OP_values in the default case + when PCRE2_UCP is not set (which is the only case in which they will appear + here). + + Note: \Q and \E are never seen here, as they were dealt with in + parse_pattern(). Neither are numerical back references or recursions, which + were turned into META_BACKREF or META_RECURSE items, respectively. \k and + \g, when followed by names, are turned into META_BACKREF_BYNAME or + META_RECURSE_BYNAME. */ + + case META_ESCAPE: + + /* We can test for escape sequences that consume a character because their + values lie between ESC_b and ESC_Z; this may have to change if any new ones + are ever created. For these sequences, we disable the setting of a first + character if it hasn't already been set. */ + + if (meta_arg > ESC_b && meta_arg < ESC_Z) + { + matched_char = TRUE; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + } + + /* Set values to reset to if this is followed by a zero repeat. */ + + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + + /* If Unicode is not supported, \P and \p are not allowed and are + faulted at parse time, so will never appear here. */ + +#ifdef SUPPORT_UNICODE + if (meta_arg == ESC_P || meta_arg == ESC_p) + { + uint32_t ptype = *(++pptr) >> 16; + uint32_t pdata = *pptr & 0xffff; + *code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP; + *code++ = ptype; + *code++ = pdata; + break; /* End META_ESCAPE */ + } +#endif + + /* For the rest (including \X when Unicode is supported - if not it's + faulted at parse time), the OP value is the escape value when PCRE2_UCP is + not set; if it is set, these escapes do not show up here because they are + converted into Unicode property tests in parse_regex(). Note that \b and \B + do a one-character lookbehind, and \A also behaves as if it does. */ + + if (meta_arg == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ + if ((meta_arg == ESC_b || meta_arg == ESC_B || meta_arg == ESC_A) && + cb->max_lookbehind == 0) + cb->max_lookbehind = 1; + + /* In non-UTF mode, and for both 32-bit modes, we turn \C into OP_ALLANY + instead of OP_ANYBYTE so that it works in DFA mode and in lookbehinds. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 + *code++ = (meta_arg == ESC_C)? OP_ALLANY : meta_arg; +#else + *code++ = (!utf && meta_arg == ESC_C)? OP_ALLANY : meta_arg; +#endif + break; /* End META_ESCAPE */ + + + /* ===================================================================*/ + /* Handle an unrecognized meta value. A parsed pattern value less than + META_END is a literal. Otherwise we have a problem. */ + + default: + if (meta >= META_END) + { +#ifdef DEBUG_SHOW_PARSED + fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x\n", *pptr); +#endif + *errorcodeptr = ERR89; /* Internal error - unrecognized. */ + return 0; + } + + /* Handle a literal character. We come here by goto in the case of a + 32-bit, non-UTF character whose value is greater than META_END. */ + + NORMAL_CHAR: + meta = *pptr; /* Get the full 32 bits */ + NORMAL_CHAR_SET: /* Character is already in meta */ + matched_char = TRUE; + + /* For caseless UTF mode, check whether this character has more than one + other case. If so, generate a special OP_PROP item instead of OP_CHARI. */ + +#ifdef SUPPORT_UNICODE + if (utf && (options & PCRE2_CASELESS) != 0) + { + uint32_t caseset = UCD_CASESET(meta); + if (caseset != 0) + { + *code++ = OP_PROP; + *code++ = PT_CLIST; + *code++ = caseset; + if (firstcuflags == REQ_UNSET) + firstcuflags = zerofirstcuflags = REQ_NONE; + break; /* End handling this meta item */ + } + } +#endif + + /* Caseful matches, or not one of the multicase characters. Get the + character's code units into mcbuffer, with the length in mclength. When not + in UTF mode, the length is always 1. */ + +#ifdef SUPPORT_UNICODE + if (utf) mclength = PRIV(ord2utf)(meta, mcbuffer); else +#endif + { + mclength = 1; + mcbuffer[0] = meta; + } + + /* Generate the appropriate code */ + + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_CHARI : OP_CHAR; + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + + /* Remember if \r or \n were seen */ + + if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL) + cb->external_flags |= PCRE2_HASCRORLF; + + /* Set the first and required code units appropriately. If no previous + first code unit, set it from this character, but revert to none on a zero + repeat. Otherwise, leave the firstcu value alone, and don't change it on + a zero repeat. */ + + if (firstcuflags == REQ_UNSET) + { + zerofirstcuflags = REQ_NONE; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + + /* If the character is more than one code unit long, we can set firstcu + only if it is not to be matched caselessly. */ + + if (mclength == 1 || req_caseopt == 0) + { + firstcu = mcbuffer[0] | req_caseopt; + firstcu = mcbuffer[0]; + firstcuflags = req_caseopt; + if (mclength != 1) + { + reqcu = code[-1]; + reqcuflags = cb->req_varyopt; + } + } + else firstcuflags = reqcuflags = REQ_NONE; + } + + /* firstcu was previously set; we can set reqcu only if the length is + 1 or the matching is caseful. */ + + else + { + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + if (mclength == 1 || req_caseopt == 0) + { + reqcu = code[-1]; + reqcuflags = req_caseopt | cb->req_varyopt; + } + } + break; /* End default meta handling */ + } /* End of big switch */ + } /* End of big loop */ + +/* Control never reaches here. */ +} + + + +/************************************************* +* Compile regex: a sequence of alternatives * +*************************************************/ + +/* On entry, pptr is pointing past the bracket meta, but on return it points to +the closing bracket or META_END. The code variable is pointing at the code unit +into which the BRA operator has been stored. This function is used during the +pre-compile phase when we are trying to find out the amount of memory needed, +as well as during the real compile phase. The value of lengthptr distinguishes +the two phases. + +Arguments: + options option bits, including any changes for this subpattern + codeptr -> the address of the current code pointer + pptrptr -> the address of the current parsed pattern pointer + errorcodeptr -> pointer to error code variable + skipunits skip this many code units at start (for brackets and OP_COND) + firstcuptr place to put the first required code unit + firstcuflagsptr place to put the first code unit flags, or a negative number + reqcuptr place to put the last required code unit + reqcuflagsptr place to put the last required code unit flags, or a negative number + bcptr pointer to the chain of currently open branches + cb points to the data block with tables pointers etc. + lengthptr NULL during the real compile phase + points to length accumulator during pre-compile phase + +Returns: 0 There has been an error + +1 Success, this group must match at least one character + -1 Success, this group may match an empty string +*/ + +static int +compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, + int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr, + int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr, + branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr) +{ +PCRE2_UCHAR *code = *codeptr; +PCRE2_UCHAR *last_branch = code; +PCRE2_UCHAR *start_bracket = code; +BOOL lookbehind; +open_capitem capitem; +int capnumber = 0; +int okreturn = 1; +uint32_t *pptr = *pptrptr; +uint32_t firstcu, reqcu; +uint32_t lookbehindlength; +int32_t firstcuflags, reqcuflags; +uint32_t branchfirstcu, branchreqcu; +int32_t branchfirstcuflags, branchreqcuflags; +PCRE2_SIZE length; +branch_chain bc; + +/* If set, call the external function that checks for stack availability. */ + +if (cb->cx->stack_guard != NULL && + cb->cx->stack_guard(cb->parens_depth, cb->cx->stack_guard_data)) + { + *errorcodeptr= ERR33; + return 0; } + +/* Miscellaneous initialization */ + +bc.outer = bcptr; +bc.current_branch = code; + +firstcu = reqcu = 0; +firstcuflags = reqcuflags = REQ_UNSET; + +/* Accumulate the length for use in the pre-compile phase. Start with the +length of the BRA and KET and any extra code units that are required at the +beginning. We accumulate in a local variable to save frequent testing of +lengthptr for NULL. We cannot do this by looking at the value of 'code' at the +start and end of each alternative, because compiled items are discarded during +the pre-compile phase so that the work space is not exceeded. */ + +length = 2 + 2*LINK_SIZE + skipunits; + +/* Remember if this is a lookbehind assertion, and if it is, save its length +and skip over the pattern offset. */ + +lookbehind = *code == OP_ASSERTBACK || *code == OP_ASSERTBACK_NOT; +if (lookbehind) + { + lookbehindlength = META_DATA(pptr[-1]); + pptr += SIZEOFFSET; + } +else lookbehindlength = 0; + +/* If this is a capturing subpattern, add to the chain of open capturing items +so that we can detect them if (*ACCEPT) is encountered. Note that only OP_CBRA +need be tested here; changing this opcode to one of its variants, e.g. +OP_SCBRAPOS, happens later, after the group has been compiled. */ + +if (*code == OP_CBRA) + { + capnumber = GET2(code, 1 + LINK_SIZE); + capitem.number = capnumber; + capitem.next = cb->open_caps; + capitem.flag = FALSE; + cb->open_caps = &capitem; + } + +/* Offset is set zero to mark that this bracket is still open */ + +PUT(code, 1, 0); +code += 1 + LINK_SIZE + skipunits; + +/* Loop for each alternative branch */ + +for (;;) + { + int branch_return; + + /* Insert OP_REVERSE if this is as lookbehind assertion. */ + + if (lookbehind && lookbehindlength > 0) + { + *code++ = OP_REVERSE; + PUTINC(code, 0, lookbehindlength); + length += 1 + LINK_SIZE; + } + + /* Now compile the branch; in the pre-compile phase its length gets added + into the length. */ + + if ((branch_return = + compile_branch(&options, &code, &pptr, errorcodeptr, &branchfirstcu, + &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc, + cb, (lengthptr == NULL)? NULL : &length)) == 0) + return 0; + + /* If a branch can match an empty string, so can the whole group. */ + + if (branch_return < 0) okreturn = -1; + + /* In the real compile phase, there is some post-processing to be done. */ + + if (lengthptr == NULL) + { + /* If this is the first branch, the firstcu and reqcu values for the + branch become the values for the regex. */ + + if (*last_branch != OP_ALT) + { + firstcu = branchfirstcu; + firstcuflags = branchfirstcuflags; + reqcu = branchreqcu; + reqcuflags = branchreqcuflags; + } + + /* If this is not the first branch, the first char and reqcu have to + match the values from all the previous branches, except that if the + previous value for reqcu didn't have REQ_VARY set, it can still match, + and we set REQ_VARY for the regex. */ + + else + { + /* If we previously had a firstcu, but it doesn't match the new branch, + we have to abandon the firstcu for the regex, but if there was + previously no reqcu, it takes on the value of the old firstcu. */ + + if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu) + { + if (firstcuflags >= 0) + { + if (reqcuflags < 0) + { + reqcu = firstcu; + reqcuflags = firstcuflags; + } + } + firstcuflags = REQ_NONE; + } + + /* If we (now or from before) have no firstcu, a firstcu from the + branch becomes a reqcu if there isn't a branch reqcu. */ + + if (firstcuflags < 0 && branchfirstcuflags >= 0 && + branchreqcuflags < 0) + { + branchreqcu = branchfirstcu; + branchreqcuflags = branchfirstcuflags; + } + + /* Now ensure that the reqcus match */ + + if (((reqcuflags & ~REQ_VARY) != (branchreqcuflags & ~REQ_VARY)) || + reqcu != branchreqcu) + reqcuflags = REQ_NONE; + else + { + reqcu = branchreqcu; + reqcuflags |= branchreqcuflags; /* To "or" REQ_VARY */ + } + } + } + + /* Handle reaching the end of the expression, either ')' or end of pattern. + In the real compile phase, go back through the alternative branches and + reverse the chain of offsets, with the field in the BRA item now becoming an + offset to the first alternative. If there are no alternatives, it points to + the end of the group. The length in the terminating ket is always the length + of the whole bracketed item. Return leaving the pointer at the terminating + char. */ + + if (META_CODE(*pptr) != META_ALT) + { + if (lengthptr == NULL) + { + PCRE2_SIZE branch_length = code - last_branch; + do + { + PCRE2_SIZE prev_length = GET(last_branch, 1); + PUT(last_branch, 1, branch_length); + branch_length = prev_length; + last_branch -= branch_length; + } + while (branch_length > 0); + } + + /* Fill in the ket */ + + *code = OP_KET; + PUT(code, 1, (int)(code - start_bracket)); + code += 1 + LINK_SIZE; + + /* If it was a capturing subpattern, check to see if it contained any + recursive back references. If so, we must wrap it in atomic brackets. In + any event, remove the block from the chain. */ + + if (capnumber > 0) + { + if (cb->open_caps->flag) + { + memmove(start_bracket + 1 + LINK_SIZE, start_bracket, + CU2BYTES(code - start_bracket)); + *start_bracket = OP_ONCE; + code += 1 + LINK_SIZE; + PUT(start_bracket, 1, (int)(code - start_bracket)); + *code = OP_KET; + PUT(code, 1, (int)(code - start_bracket)); + code += 1 + LINK_SIZE; + length += 2 + 2*LINK_SIZE; + } + cb->open_caps = cb->open_caps->next; + } + + /* Set values to pass back */ + + *codeptr = code; + *pptrptr = pptr; + *firstcuptr = firstcu; + *firstcuflagsptr = firstcuflags; + *reqcuptr = reqcu; + *reqcuflagsptr = reqcuflags; + if (lengthptr != NULL) + { + if (OFLOW_MAX - *lengthptr < length) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += length; + } + return okreturn; + } + + /* Another branch follows. In the pre-compile phase, we can move the code + pointer back to where it was for the start of the first branch. (That is, + pretend that each branch is the only one.) + + In the real compile phase, insert an ALT node. Its length field points back + to the previous branch while the bracket remains open. At the end the chain + is reversed. It's done like this so that the start of the bracket has a + zero offset until it is closed, making it possible to detect recursion. */ + + if (lengthptr != NULL) + { + code = *codeptr + 1 + LINK_SIZE + skipunits; + length += 1 + LINK_SIZE; + } + else + { + *code = OP_ALT; + PUT(code, 1, (int)(code - last_branch)); + bc.current_branch = last_branch = code; + code += 1 + LINK_SIZE; + } + + /* Set the lookbehind length (if not in a lookbehind the value will be zero) + and then advance past the vertical bar. */ + + lookbehindlength = META_DATA(*pptr); + pptr++; + } +/* Control never reaches here */ +} + + + +/************************************************* +* Check for anchored pattern * +*************************************************/ + +/* Try to find out if this is an anchored regular expression. Consider each +alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket +all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then +it's anchored. However, if this is a multiline pattern, then only OP_SOD will +be found, because ^ generates OP_CIRCM in that mode. + +We can also consider a regex to be anchored if OP_SOM starts all its branches. +This is the code for \G, which means "match at start of match position, taking +into account the match offset". + +A branch is also implicitly anchored if it starts with .* and DOTALL is set, +because that will try the rest of the pattern at all possible matching points, +so there is no point trying again.... er .... + +.... except when the .* appears inside capturing parentheses, and there is a +subsequent back reference to those parentheses. We haven't enough information +to catch that case precisely. + +At first, the best we could do was to detect when .* was in capturing brackets +and the highest back reference was greater than or equal to that level. +However, by keeping a bitmap of the first 31 back references, we can catch some +of the more common cases more precisely. + +... A second exception is when the .* appears inside an atomic group, because +this prevents the number of characters it matches from being adjusted. + +Arguments: + code points to start of the compiled pattern + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach + cb points to the compile data block + atomcount atomic group level + inassert TRUE if in an assertion + +Returns: TRUE or FALSE +*/ + +static BOOL +is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, + int atomcount, BOOL inassert) +{ +do { + PCRE2_SPTR scode = first_significant_code( + code + PRIV(OP_lengths)[*code], FALSE); + int op = *scode; + + /* Non-capturing brackets */ + + if (op == OP_BRA || op == OP_BRAPOS || + op == OP_SBRA || op == OP_SBRAPOS) + { + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; + } + + /* Capturing brackets */ + + else if (op == OP_CBRA || op == OP_CBRAPOS || + op == OP_SCBRA || op == OP_SCBRAPOS) + { + int n = GET2(scode, 1+LINK_SIZE); + int new_map = bracket_map | ((n < 32)? (1u << n) : 1); + if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE; + } + + /* Positive forward assertion */ + + else if (op == OP_ASSERT) + { + if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + } + + /* Condition */ + + else if (op == OP_COND) + { + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; + } + + /* Atomic groups */ + + else if (op == OP_ONCE || op == OP_ONCE_NC) + { + if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert)) + return FALSE; + } + + /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and + it isn't in brackets that are or may be referenced or inside an atomic + group or an assertion. Also the pattern must not contain *PRUNE or *SKIP, + because these break the feature. Consider, for example, /(?s).*?(*PRUNE)b/ + with the subject "aab", which matches "b", i.e. not at the start of a line. + There is also an option that disables auto-anchoring. */ + + else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || + op == OP_TYPEPOSSTAR)) + { + if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 || + atomcount > 0 || cb->had_pruneorskip || inassert || + (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + return FALSE; + } + + /* Check for explicit anchoring */ + + else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; + + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; +} + + + +/************************************************* +* Check for starting with ^ or .* * +*************************************************/ + +/* This is called to find out if every branch starts with ^ or .* so that +"first char" processing can be done to speed things up in multiline +matching and for non-DOTALL patterns that start with .* (which must start at +the beginning or after \n). As in the case of is_anchored() (see above), we +have to take account of back references to capturing brackets that contain .* +because in that case we can't make the assumption. Also, the appearance of .* +inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE +or *SKIP does not count, because once again the assumption no longer holds. + +Arguments: + code points to start of the compiled pattern or a group + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach + cb points to the compile data + atomcount atomic group level + inassert TRUE if in an assertion + +Returns: TRUE or FALSE +*/ + +static BOOL +is_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, + int atomcount, BOOL inassert) +{ +do { + PCRE2_SPTR scode = first_significant_code( + code + PRIV(OP_lengths)[*code], FALSE); + int op = *scode; + + /* If we are at the start of a conditional assertion group, *both* the + conditional assertion *and* what follows the condition must satisfy the test + for start of line. Other kinds of condition fail. Note that there may be an + auto-callout at the start of a condition. */ + + if (op == OP_COND) + { + scode += 1 + LINK_SIZE; + + if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; + else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE); + + switch (*scode) + { + case OP_CREF: + case OP_DNCREF: + case OP_RREF: + case OP_DNRREF: + case OP_FAIL: + case OP_FALSE: + case OP_TRUE: + return FALSE; + + default: /* Assertion */ + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + do scode += GET(scode, 1); while (*scode == OP_ALT); + scode += 1 + LINK_SIZE; + break; + } + scode = first_significant_code(scode, FALSE); + op = *scode; + } + + /* Non-capturing brackets */ + + if (op == OP_BRA || op == OP_BRAPOS || + op == OP_SBRA || op == OP_SBRAPOS) + { + if (!is_startline(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; + } + + /* Capturing brackets */ + + else if (op == OP_CBRA || op == OP_CBRAPOS || + op == OP_SCBRA || op == OP_SCBRAPOS) + { + int n = GET2(scode, 1+LINK_SIZE); + int new_map = bracket_map | ((n < 32)? (1u << n) : 1); + if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE; + } + + /* Positive forward assertions */ + + else if (op == OP_ASSERT) + { + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) + return FALSE; + } + + /* Atomic brackets */ + + else if (op == OP_ONCE || op == OP_ONCE_NC) + { + if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert)) + return FALSE; + } + + /* .* means "start at start or after \n" if it isn't in atomic brackets or + brackets that may be referenced or an assertion, and as long as the pattern + does not contain *PRUNE or *SKIP, because these break the feature. Consider, + for example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", + i.e. not at the start of a line. There is also an option that disables this + optimization. */ + + else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) + { + if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 || + atomcount > 0 || cb->had_pruneorskip || inassert || + (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + return FALSE; + } + + /* Check for explicit circumflex; anything else gives a FALSE result. Note + in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC + because the number of characters matched by .* cannot be adjusted inside + them. */ + + else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; + + /* Move on to the next alternative */ + + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; } @@ -2521,7 +7755,7 @@ find_recurse(PCRE2_SPTR code, BOOL utf) { for (;;) { - register PCRE2_UCHAR c = *code; + PCRE2_UCHAR c = *code; if (c == OP_END) return NULL; if (c == OP_RECURSE) return code; @@ -2535,8 +7769,8 @@ for (;;) /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra - two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we - must add in its length. */ + two code units of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, + we must add in its length. */ else { @@ -2649,5243 +7883,6 @@ for (;;) -/************************************************* -* Adjust OP_RECURSE items in repeated group * -*************************************************/ - -/* OP_RECURSE items contain an offset from the start of the regex to the group -that is referenced. This means that groups can be replicated for fixed -repetition simply by copying (because the recursion is allowed to refer to -earlier groups that are outside the current group). However, when a group is -optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is -inserted before it, after it has been compiled. This means that any OP_RECURSE -items within it that refer to the group itself or any contained groups have to -have their offsets adjusted. That is one of the jobs of this function. Before -it is called, the partially compiled regex must be temporarily terminated with -OP_END. - -This function has been extended to cope with forward references for recursions -and subroutine calls. It must check the list of such references for the -group we are dealing with. If it finds that one of the recursions in the -current group is on this list, it does not adjust the value in the reference -(which is a group number). After the group has been scanned, all the offsets in -the forward reference list for the group are adjusted. - -Arguments: - group points to the start of the group - adjust the amount by which the group is to be moved - utf TRUE in UTF mode - cb compile data - save_hwm_offset the hwm forward reference offset at the start of the group - -Returns: nothing -*/ - -static void -adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb, - size_t save_hwm_offset) -{ -uint32_t offset; -PCRE2_UCHAR *hc; -PCRE2_UCHAR *ptr = group; - -/* Scan the group for recursions. For each one found, check the forward -reference list. */ - -while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL) - { - for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm; - hc += LINK_SIZE) - { - offset = (int)GET(hc, 0); - if (cb->start_code + offset == ptr + 1) break; - } - - /* If we have not found this recursion on the forward reference list, adjust - the recursion's offset if it's after the start of this group. */ - - if (hc >= cb->hwm) - { - offset = (int)GET(ptr, 1); - if (cb->start_code + offset >= group) PUT(ptr, 1, offset + adjust); - } - - ptr += 1 + LINK_SIZE; - } - -/* Now adjust all forward reference offsets for the group. */ - -for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm; - hc += LINK_SIZE) - { - offset = (int)GET(hc, 0); - PUT(hc, 0, offset + adjust); - } -} - - - -/************************************************* -* Check for POSIX class syntax * -*************************************************/ - -/* This function is called when the sequence "[:" or "[." or "[=" is -encountered in a character class. It checks whether this is followed by a -sequence of characters terminated by a matching ":]" or ".]" or "=]". If we -reach an unescaped ']' without the special preceding character, return FALSE. - -Originally, this function only recognized a sequence of letters between the -terminators, but it seems that Perl recognizes any sequence of characters, -though of course unknown POSIX names are subsequently rejected. Perl gives an -"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE -didn't consider this to be a POSIX class. Likewise for [:1234:]. - -The problem in trying to be exactly like Perl is in the handling of escapes. We -have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX -class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code -below handles the special case of \], but does not try to do any other escape -processing. This makes it different from Perl for cases such as [:l\ower:] -where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize -"l\ower". This is a lesser evil than not diagnosing bad classes when Perl does, -I think. - -A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. -It seems that the appearance of a nested POSIX class supersedes an apparent -external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or -a digit. - -In Perl, unescaped square brackets may also appear as part of class names. For -example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for -[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not -seem right at all. PCRE does not allow closing square brackets in POSIX class -names. - -Arguments: - ptr pointer to the initial [ - endptr where to return a pointer to the terminating ':', '.', or '=' - -Returns: TRUE or FALSE -*/ - -static BOOL -check_posix_syntax(PCRE2_SPTR ptr, PCRE2_SPTR *endptr) -{ -PCRE2_UCHAR terminator; /* Don't combine these lines; the Solaris cc */ -terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ - -for (++ptr; *ptr != CHAR_NULL; ptr++) - { - if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; - else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; - else - { - if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) - { - *endptr = ptr; - return TRUE; - } - if (*ptr == CHAR_LEFT_SQUARE_BRACKET && - (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && - check_posix_syntax(ptr, endptr)) - return FALSE; - } - } -return FALSE; -} - - - -/************************************************* -* Check POSIX class name * -*************************************************/ - -/* This function is called to check the name given in a POSIX-style class entry -such as [:alnum:]. - -Arguments: - ptr points to the first letter - len the length of the name - -Returns: a value representing the name, or -1 if unknown -*/ - -static int -check_posix_name(PCRE2_SPTR ptr, int len) -{ -const char *pn = posix_names; -register int yield = 0; -while (posix_name_lengths[yield] != 0) - { - if (len == posix_name_lengths[yield] && - PRIV(strncmp_c8)(ptr, pn, (unsigned int)len) == 0) return yield; - pn += posix_name_lengths[yield] + 1; - yield++; - } -return -1; -} - - - -#ifdef SUPPORT_UNICODE -/************************************************* -* Get othercase range * -*************************************************/ - -/* This function is passed the start and end of a class range in UCT mode. It -searches up the characters, looking for ranges of characters in the "other" -case. Each call returns the next one, updating the start address. A character -with multiple other cases is returned on its own with a special return value. - -Arguments: - cptr points to starting character value; updated - d end value - ocptr where to put start of othercase range - odptr where to put end of othercase range - -Yield: -1 when no more - 0 when a range is returned - >0 the CASESET offset for char with multiple other cases - in this case, ocptr contains the original -*/ - -static int -get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, - uint32_t *odptr) -{ -uint32_t c, othercase, next; -unsigned int co; - -/* Find the first character that has an other case. If it has multiple other -cases, return its case offset value. */ - -for (c = *cptr; c <= d; c++) - { - if ((co = UCD_CASESET(c)) != 0) - { - *ocptr = c++; /* Character that has the set */ - *cptr = c; /* Rest of input range */ - return (int)co; - } - if ((othercase = UCD_OTHERCASE(c)) != c) break; - } - -if (c > d) return -1; /* Reached end of range */ - -/* Found a character that has a single other case. Search for the end of the -range, which is either the end of the input range, or a character that has zero -or more than one other cases. */ - -*ocptr = othercase; -next = othercase + 1; - -for (++c; c <= d; c++) - { - if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; - next++; - } - -*odptr = next - 1; /* End of othercase range */ -*cptr = c; /* Rest of input range */ -return 0; -} -#endif /* SUPPORT_UNICODE */ - - - -/************************************************* -* Add a character or range to a class * -*************************************************/ - -/* This function packages up the logic of adding a character or range of -characters to a class. The character values in the arguments will be within the -valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is -mutually recursive with the function immediately below. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options word - cb compile data - start start of range character - end end of range character - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static int -add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, - compile_block *cb, uint32_t start, uint32_t end) -{ -uint32_t c; -uint32_t classbits_end = (end <= 0xff ? end : 0xff); -int n8 = 0; - -/* If caseless matching is required, scan the range and process alternate -cases. In Unicode, there are 8-bit characters that have alternate cases that -are greater than 255 and vice-versa. Sometimes we can just extend the original -range. */ - -if ((options & PCRE2_CASELESS) != 0) - { -#ifdef SUPPORT_UNICODE - if ((options & PCRE2_UTF) != 0) - { - int rc; - uint32_t oc, od; - - options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ - c = start; - - while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) - { - /* Handle a single character that has more than one other case. */ - - if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cb, - PRIV(ucd_caseless_sets) + rc, oc); - - /* Do nothing if the other case range is within the original range. */ - - else if (oc >= start && od <= end) continue; - - /* Extend the original range if there is overlap, noting that if oc < c, we - can't have od > end because a subrange is always shorter than the basic - range. Otherwise, use a recursive call to add the additional range. */ - - else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ - else if (od > end && oc <= end + 1) - { - end = od; /* Extend upwards */ - if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); - } - else n8 += add_to_class(classbits, uchardptr, options, cb, oc, od); - } - } - else -#endif /* SUPPORT_UNICODE */ - - /* Not UTF mode */ - - for (c = start; c <= classbits_end; c++) - { - SETBIT(classbits, cb->fcc[c]); - n8++; - } - } - -/* Now handle the original range. Adjust the final value according to the bit -length - this means that the same lists of (e.g.) horizontal spaces can be used -in all cases. */ - -if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR) - end = MAX_NON_UTF_CHAR; - -/* Use the bitmap for characters < 256. Otherwise use extra data.*/ - -for (c = start; c <= classbits_end; c++) - { - /* Regardless of start, c will always be <= 255. */ - SETBIT(classbits, c); - n8++; - } - -#ifdef SUPPORT_WIDE_CHARS -if (start <= 0xff) start = 0xff + 1; - -if (end >= start) - { - PCRE2_UCHAR *uchardata = *uchardptr; - -#ifdef SUPPORT_UNICODE - if ((options & PCRE2_UTF) != 0) - { - if (start < end) - { - *uchardata++ = XCL_RANGE; - uchardata += PRIV(ord2utf)(start, uchardata); - uchardata += PRIV(ord2utf)(end, uchardata); - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - uchardata += PRIV(ord2utf)(start, uchardata); - } - } - else -#endif /* SUPPORT_UNICODE */ - - /* Without UTF support, character values are constrained by the bit length, - and can only be > 256 for 16-bit and 32-bit libraries. */ - -#if PCRE2_CODE_UNIT_WIDTH == 8 - {} -#else - if (start < end) - { - *uchardata++ = XCL_RANGE; - *uchardata++ = start; - *uchardata++ = end; - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - *uchardata++ = start; - } -#endif - *uchardptr = uchardata; /* Updata extra data pointer */ - } -#else - (void)uchardptr; /* Avoid compiler warning */ -#endif /* SUPPORT_WIDE_CHARS */ - -return n8; /* Number of 8-bit characters */ -} - - - -/************************************************* -* Add a list of characters to a class * -*************************************************/ - -/* This function is used for adding a list of case-equivalent characters to a -class, and also for adding a list of horizontal or vertical whitespace. If the -list is in order (which it should be), ranges of characters are detected and -handled appropriately. This function is mutually recursive with the function -above. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options word - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of - case-equivalent characters to avoid including the one we - already know about - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static int -add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, - compile_block *cb, const uint32_t *p, unsigned int except) -{ -int n8 = 0; -while (p[0] < NOTACHAR) - { - int n = 0; - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; - n8 += add_to_class(classbits, uchardptr, options, cb, p[0], p[n]); - } - p += n + 1; - } -return n8; -} - - - -/************************************************* -* Add characters not in a list to a class * -*************************************************/ - -/* This function is used for adding the complement of a list of horizontal or -vertical whitespace to a class. The list must be in order. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options word - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static int -add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, - uint32_t options, compile_block *cb, const uint32_t *p) -{ -BOOL utf = (options & PCRE2_UTF) != 0; -int n8 = 0; -if (p[0] > 0) - n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1); -while (p[0] < NOTACHAR) - { - while (p[1] == p[0] + 1) p++; - n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1, - (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); - p++; - } -return n8; -} - - - -/************************************************* -* Scan regex to identify named groups * -*************************************************/ - -/* This function is called first of all, to scan for named capturing groups so -that information about them is fully available to both the compiling scans. -It skips over everything except parenthesized items. - -Arguments: - ptrptr points to pointer to the start of the pattern - options compiling dynamic options - cb pointer to the compile data block - -Returns: zero on success or a non-zero error code, with pointer updated -*/ - -typedef struct nest_save { - uint16_t nest_depth; - uint16_t reset_group; - uint16_t max_group; - uint16_t flags; -} nest_save; - -#define NSF_RESET 0x0001u -#define NSF_EXTENDED 0x0002u -#define NSF_DUPNAMES 0x0004u - -static uint32_t scan_for_captures(PCRE2_SPTR *ptrptr, uint32_t options, - compile_block *cb) -{ -uint32_t c; -uint32_t nest_depth = 0; -uint32_t set, unset, *optset; -int errorcode = 0; -int escape; -int namelen; -int i; -BOOL inescq = FALSE; -BOOL isdupname; -BOOL utf = (options & PCRE2_UTF) != 0; -BOOL negate_class; -PCRE2_SPTR name; -PCRE2_SPTR ptr = *ptrptr; -named_group *ng; -nest_save *top_nest = NULL; -nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size); - -for (; ptr < cb->end_pattern; ptr++) - { - c = *ptr; - - /* Skip over literals */ - - if (inescq) - { - if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) - { - inescq = FALSE; - ptr++; - } - continue; - } - - /* Skip over comments and whitespace in extended mode. Need a loop to handle - whitespace after a comment. */ - - if ((options & PCRE2_EXTENDED) != 0) - { - for (;;) - { - while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr); - if (c != CHAR_NUMBER_SIGN) break; - ptr++; - while (*ptr != CHAR_NULL) - { - if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ - { /* IS_NEWLINE sets cb->nllen. */ - ptr += cb->nllen; - break; - } - ptr++; -#ifdef SUPPORT_UNICODE - if (utf) FORWARDCHAR(ptr); -#endif - } - c = *ptr; /* Either NULL or the char after a newline */ - } - } - - /* Process the next pattern item. */ - - switch(c) - { - default: /* Most characters are just skipped */ - break; - - /* Skip escapes except for \Q */ - - case CHAR_BACKSLASH: - errorcode = 0; - escape = check_escape(&ptr, &c, &errorcode, options, FALSE, cb); - if (errorcode != 0) goto FAILED; - if (escape == ESC_Q) inescq = TRUE; - break; - - /* Skip a character class. The syntax is complicated so we have to - replicate some of what happens when a class is processed for real. */ - - case CHAR_LEFT_SQUARE_BRACKET: - if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0 || - PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0) - { - ptr += 7; - break; - } - - /* If the first character is '^', set the negation flag (not actually used - here, except to recognize only one ^) and skip it. If the first few - characters (either before or after ^) are \Q\E or \E we skip them too. This - makes for compatibility with Perl. */ - - negate_class = FALSE; - for (;;) - { - c = *(++ptr); /* First character in class */ - if (c == CHAR_BACKSLASH) - { - if (ptr[1] == CHAR_E) - ptr++; - else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) - ptr += 3; - else - break; - } - else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) - negate_class = TRUE; - else break; - } - - if (c == CHAR_RIGHT_SQUARE_BRACKET && - (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) - break; - - /* Loop for the contents of the class */ - - for (;;) - { - if (c == CHAR_NULL && ptr >= cb->end_pattern) - { - errorcode = ERR6; /* Missing terminating ']' */ - goto FAILED; - } - -#ifdef SUPPORT_UNICODE - if (utf && HAS_EXTRALEN(c)) - { /* Braces are required because the */ - GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ - } -#endif - - /* Inside \Q...\E everything is literal except \E */ - - if (inescq) - { - if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */ - { - inescq = FALSE; /* Reset literal state */ - ptr++; /* Skip the 'E' */ - } - goto CONTINUE_CLASS; - } - - /* Skip POSIX class names. */ - - if (c == CHAR_LEFT_SQUARE_BRACKET && - (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &ptr)) - ptr ++; - - else if (c == CHAR_BACKSLASH) - { - errorcode = 0; - escape = check_escape(&ptr, &c, &errorcode, options, TRUE, cb); - if (errorcode != 0) goto FAILED; - if (escape == ESC_Q) inescq = TRUE; - } - - CONTINUE_CLASS: - c = *(++ptr); - if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; - } /* End of class-processing loop */ - break; - - /* This is the real work of this function - handling parentheses. */ - - case CHAR_LEFT_PARENTHESIS: - nest_depth++; - - if (ptr[1] != CHAR_QUESTION_MARK) - { - if (ptr[1] != CHAR_ASTERISK && - (options & PCRE2_NO_AUTO_CAPTURE) == 0) - cb->bracount++; /* Capturing group */ - else /* (*something) - just skip to closing ket */ - { - ptr += 2; - while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - } - } - - /* Handle (?...) groups */ - - else switch(ptr[2]) - { - default: - ptr += 2; - if (ptr[0] == CHAR_R || /* (?R) */ - ptr[0] == CHAR_C || /* (?C) */ - IS_DIGIT(ptr[0]) || /* (?n) */ - (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) break; /* (?-n) */ - - /* Handle (?| and (?imsxJU: which are the only other valid forms. Both - need a new block on the nest stack. */ - - if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace); - else if (++top_nest >= end_nests) - { - errorcode = ERR84; - goto FAILED; - } - top_nest->nest_depth = nest_depth; - top_nest->flags = 0; - if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED; - if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES; - - if (*ptr == CHAR_VERTICAL_LINE) - { - top_nest->reset_group = cb->bracount; - top_nest->max_group = cb->bracount; - top_nest->flags |= NSF_RESET; - break; - } - - /* Scan options */ - - top_nest->reset_group = 0; - top_nest->max_group = 0; - - set = unset = 0; - optset = &set; - - /* Need only track (?x: and (?J: at this stage */ - - while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) - { - switch (*ptr++) - { - case CHAR_MINUS: optset = &unset; break; - - case CHAR_x: *optset |= PCRE2_EXTENDED; break; - - case CHAR_J: - *optset |= PCRE2_DUPNAMES; - cb->external_flags |= PCRE2_JCHANGED; - break; - - case CHAR_i: - case CHAR_m: - case CHAR_s: - case CHAR_U: - break; - - default: errorcode = ERR11; - ptr--; /* Correct the offset */ - goto FAILED; - } - } - - options = (options | set) & (~unset); - - /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. If the - previous level set up a nest block, discard the one we have just created. - Otherwise adjust it for the previous level. */ - - if (*ptr == CHAR_RIGHT_PARENTHESIS) - { - nest_depth--; - if (top_nest > (nest_save *)(cb->start_workspace) && - (top_nest-1)->nest_depth == nest_depth) top_nest --; - else top_nest->nest_depth = nest_depth; - } - break; - - case CHAR_NUMBER_SIGN: - ptr += 3; - while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - errorcode = ERR18; - goto FAILED; - } - break; - - case CHAR_LEFT_PARENTHESIS: - nest_depth++; - /* Fall through */ - - case CHAR_COLON: - case CHAR_GREATER_THAN_SIGN: - case CHAR_EQUALS_SIGN: - case CHAR_EXCLAMATION_MARK: - case CHAR_AMPERSAND: - case CHAR_PLUS: - ptr += 2; - break; - - case CHAR_P: - if (ptr[3] != CHAR_LESS_THAN_SIGN) - { - ptr += 3; - break; - } - ptr++; - c = CHAR_GREATER_THAN_SIGN; /* Terminator */ - goto DEFINE_NAME; - - case CHAR_LESS_THAN_SIGN: - if (ptr[3] == CHAR_EQUALS_SIGN || ptr[3] == CHAR_EXCLAMATION_MARK) - { - ptr += 3; - break; - } - c = CHAR_GREATER_THAN_SIGN; /* Terminator */ - goto DEFINE_NAME; - - case CHAR_APOSTROPHE: - c = CHAR_APOSTROPHE; /* Terminator */ - - DEFINE_NAME: - name = ptr = ptr + 3; - - if (*ptr == c) /* Empty name */ - { - errorcode = ERR62; - goto FAILED; - } - - if (IS_DIGIT(*ptr)) - { - errorcode = ERR44; /* Group name must start with non-digit */ - goto FAILED; - } - - if (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) == 0) - { - errorcode = ERR24; - goto FAILED; - } - - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = (int)(ptr - name); - - if (*ptr != c) - { - errorcode = ERR42; - goto FAILED; - } - - if (cb->names_found >= MAX_NAME_COUNT) - { - errorcode = ERR49; - goto FAILED; - } - - if (namelen + IMM2_SIZE + 1 > cb->name_entry_size) - { - cb->name_entry_size = namelen + IMM2_SIZE + 1; - if (namelen > MAX_NAME_SIZE) - { - errorcode = ERR48; - goto FAILED; - } - } - - /* We have a valid name for this capturing group. */ - - cb->bracount++; - - /* Scan the list to check for duplicates. For duplicate names, if the - number is the same, break the loop, which causes the name to be - discarded; otherwise, if DUPNAMES is not set, give an error. - If it is set, allow the name with a different number, but continue - scanning in case this is a duplicate with the same number. For - non-duplicate names, give an error if the number is duplicated. */ - - isdupname = FALSE; - ng = cb->named_groups; - for (i = 0; i < cb->names_found; i++, ng++) - { - if (namelen == ng->length && - PRIV(strncmp)(name, ng->name, namelen) == 0) - { - if (ng->number == cb->bracount) break; - if ((options & PCRE2_DUPNAMES) == 0) - { - errorcode = ERR43; - goto FAILED; - } - isdupname = ng->isdup = TRUE; /* Mark as a duplicate */ - cb->dupnames = TRUE; /* Duplicate names exist */ - } - else if (ng->number == cb->bracount) - { - errorcode = ERR65; - goto FAILED; - } - } - - if (i < cb->names_found) break; /* Ignore duplicate with same number */ - - /* Increase the list size if necessary */ - - if (cb->names_found >= cb->named_group_list_size) - { - int newsize = cb->named_group_list_size * 2; - named_group *newspace = - cb->cx->memctl.malloc(newsize * sizeof(named_group), - cb->cx->memctl.memory_data); - if (newspace == NULL) - { - errorcode = ERR21; - goto FAILED; - } - - memcpy(newspace, cb->named_groups, - cb->named_group_list_size * sizeof(named_group)); - if (cb->named_group_list_size > NAMED_GROUP_LIST_SIZE) - cb->cx->memctl.free((void *)cb->named_groups, - cb->cx->memctl.memory_data); - cb->named_groups = newspace; - cb->named_group_list_size = newsize; - } - - /* Add this name to the list */ - - cb->named_groups[cb->names_found].name = name; - cb->named_groups[cb->names_found].length = namelen; - cb->named_groups[cb->names_found].number = cb->bracount; - cb->named_groups[cb->names_found].isdup = isdupname; - cb->names_found++; - break; - } /* End of (? switch */ - break; /* End of ( handling */ - - /* At an alternation, reset the capture count if we are in a (?| group. */ - - case CHAR_VERTICAL_LINE: - if (top_nest != NULL && top_nest->nest_depth == nest_depth && - (top_nest->flags & NSF_RESET) != 0) - { - if (cb->bracount > top_nest->max_group) - top_nest->max_group = cb->bracount; - cb->bracount = top_nest->reset_group; - } - break; - - /* At a right parenthesis, reset the capture count to the maximum if we - are in a (?| group and/or reset the extended option. */ - - case CHAR_RIGHT_PARENTHESIS: - if (top_nest != NULL && top_nest->nest_depth == nest_depth) - { - if ((top_nest->flags & NSF_RESET) != 0 && - top_nest->max_group > cb->bracount) - cb->bracount = top_nest->max_group; - if ((top_nest->flags & NSF_EXTENDED) != 0) options |= PCRE2_EXTENDED; - else options &= ~PCRE2_EXTENDED; - if ((top_nest->flags & NSF_DUPNAMES) != 0) options |= PCRE2_DUPNAMES; - else options &= ~PCRE2_DUPNAMES; - if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; - else top_nest--; - } - nest_depth--; - break; - } - } - -cb->final_bracount = cb->bracount; -return 0; - -FAILED: -*ptrptr = ptr; -return errorcode; -} - - - -/************************************************* -* Compile one branch * -*************************************************/ - -/* Scan the pattern, compiling it into the a vector. If the options are -changed during the branch, the pointer is used to change the external options -bits. This function is used during the pre-compile phase when we are trying -to find out the amount of memory needed, as well as during the real compile -phase. The value of lengthptr distinguishes the two phases. - -Arguments: - optionsptr pointer to the option bits - codeptr points to the pointer to the current code point - ptrptr points to the current pattern pointer - errorcodeptr points to error code variable - firstcuptr place to put the first required code unit - firstcuflagsptr place to put the first code unit flags, or a negative number - reqcuptr place to put the last required code unit - reqcuflagsptr place to put the last required code unit flags, or a negative number - bcptr points to current branch chain - cond_depth conditional nesting depth - cb contains pointers to tables etc. - lengthptr NULL during the real compile phase - points to length accumulator during pre-compile phase - -Returns: TRUE on success - FALSE, with *errorcodeptr set non-zero on error -*/ - -static BOOL -compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, - PCRE2_SPTR *ptrptr, int *errorcodeptr, - uint32_t *firstcuptr, int32_t *firstcuflagsptr, - uint32_t *reqcuptr, int32_t *reqcuflagsptr, - branch_chain *bcptr, int cond_depth, - compile_block *cb, size_t *lengthptr) -{ -int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ -int bravalue = 0; -uint32_t greedy_default, greedy_non_default; -uint32_t repeat_type, op_type; -uint32_t options = *optionsptr; /* May change dynamically */ -uint32_t firstcu, reqcu; -int32_t firstcuflags, reqcuflags; -uint32_t zeroreqcu, zerofirstcu; -int32_t zeroreqcuflags, zerofirstcuflags; -int32_t req_caseopt, reqvary, tempreqvary; -int after_manual_callout = 0; -int escape; -size_t length_prevgroup = 0; -size_t item_hwm_offset = 0; -register uint32_t c; -register PCRE2_UCHAR *code = *codeptr; -PCRE2_UCHAR *last_code = code; -PCRE2_UCHAR *orig_code = code; -PCRE2_UCHAR *tempcode; -BOOL inescq = FALSE; -BOOL groupsetfirstcu = FALSE; -PCRE2_SPTR ptr = *ptrptr; -PCRE2_SPTR tempptr; -PCRE2_SPTR nestptr = NULL; -PCRE2_UCHAR *previous = NULL; -PCRE2_UCHAR *previous_callout = NULL; -uint8_t classbits[32]; - -/* We can fish out the UTF setting once and for all into a BOOL, but we must -not do this for other options (e.g. PCRE2_EXTENDED) because they may change -dynamically as we process the pattern. */ - -#ifdef SUPPORT_UNICODE -BOOL utf = (options & PCRE2_UTF) != 0; -#if PCRE2_CODE_UNIT_WIDTH != 32 -PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */ -#endif - -#else /* No UTF support */ -BOOL utf = FALSE; -#endif - -/* Helper variables for OP_XCLASS opcode (for characters > 255). We define -class_uchardata always so that it can be passed to add_to_class() always, -though it will not be used in non-UTF 8-bit cases. This avoids having to supply -alternative calls for the different cases. */ - -PCRE2_UCHAR *class_uchardata; -#ifdef SUPPORT_WIDE_CHARS -BOOL xclass; -PCRE2_UCHAR *class_uchardata_base; -#endif - -/* Set up the default and non-default settings for greediness */ - -greedy_default = ((options & PCRE2_UNGREEDY) != 0); -greedy_non_default = greedy_default ^ 1; - -/* Initialize no first unit, no required unit. REQ_UNSET means "no char -matching encountered yet". It gets changed to REQ_NONE if we hit something that -matches a non-fixed first unit; reqcu just remains unset if we never find one. - -When we hit a repeat whose minimum is zero, we may have to adjust these values -to take the zero repeat into account. This is implemented by setting them to -zerofirstcu and zeroreqcu when such a repeat is encountered. The individual -item types that can be repeated set these backoff variables appropriately. */ - -firstcu = reqcu = zerofirstcu = zeroreqcu = 0; -firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET; - -/* The variable req_caseopt contains either the REQ_CASELESS value or zero, -according to the current setting of the caseless flag. The REQ_CASELESS value -leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables -to record the case status of the value. This is used only for ASCII characters. -*/ - -req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; - -/* Switch on next character until the end of the branch */ - -for (;; ptr++) - { - BOOL negate_class; - BOOL should_flip_negation; - BOOL possessive_quantifier; - BOOL is_quantifier; - BOOL is_recurse; - BOOL is_dupname; - BOOL reset_bracount; - int class_has_8bitchar; - int class_one_char; -#ifdef SUPPORT_WIDE_CHARS - BOOL xclass_has_prop; -#endif - int recno; /* Must be signed */ - int refsign; /* Must be signed */ - int terminator; /* Must be signed */ - unsigned int mclength; - unsigned int tempbracount; - uint32_t ec; - uint32_t newoptions; - uint32_t skipunits; - uint32_t subreqcu, subfirstcu; - int32_t subreqcuflags, subfirstcuflags; /* Must be signed */ - PCRE2_UCHAR mcbuffer[8]; - - /* Get next character in the pattern */ - - c = *ptr; - - /* If we are at the end of a nested substitution, revert to the outer level - string. Nesting only happens one level deep. */ - - if (c == CHAR_NULL && nestptr != NULL) - { - ptr = nestptr; - nestptr = NULL; - c = *ptr; - } - - /* If we are in the pre-compile phase, accumulate the length used for the - previous cycle of this loop. */ - - if (lengthptr != NULL) - { - if (code > cb->start_workspace + cb->workspace_size - - WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ - { - *errorcodeptr = ERR52; - goto FAILED; - } - - /* There is at least one situation where code goes backwards: this is the - case of a zero quantifier after a class (e.g. [ab]{0}). At compile time, - the class is simply eliminated. However, it is created first, so we have to - allow memory for it. Therefore, don't ever reduce the length at this point. - */ - - if (code < last_code) code = last_code; - - /* Paranoid check for integer overflow */ - - if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code)) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += code - last_code; - - /* If "previous" is set and it is not at the start of the work space, move - it back to there, in order to avoid filling up the work space. Otherwise, - if "previous" is NULL, reset the current code pointer to the start. */ - - if (previous != NULL) - { - if (previous > orig_code) - { - memmove(orig_code, previous, CU2BYTES(code - previous)); - code -= previous - orig_code; - previous = orig_code; - } - } - else code = orig_code; - - /* Remember where this code item starts so we can pick up the length - next time round. */ - - last_code = code; - } - - /* In the real compile phase, just check the workspace used by the forward - reference list. */ - - else if (cb->hwm > cb->start_workspace + cb->workspace_size - - WORK_SIZE_SAFETY_MARGIN) - { - *errorcodeptr = ERR52; - goto FAILED; - } - - /* If in \Q...\E, check for the end; if not, we have a literal */ - - if (inescq && (c != CHAR_NULL || ptr < cb->end_pattern)) - { - if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) - { - inescq = FALSE; - ptr++; - continue; - } - else - { - if (previous_callout != NULL) - { - if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ - complete_callout(previous_callout, ptr, cb); - previous_callout = NULL; - } - if ((options & PCRE2_AUTO_CALLOUT) != 0) - { - previous_callout = code; - code = auto_callout(code, ptr, cb); - } - goto NORMAL_CHAR; - } - /* Control does not reach here. */ - } - - /* In extended mode, skip white space and comments. We need a loop in order - to check for more white space and more comments after a comment. */ - - if ((options & PCRE2_EXTENDED) != 0) - { - for (;;) - { - while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr); - if (c != CHAR_NUMBER_SIGN) break; - ptr++; - while (*ptr != CHAR_NULL) - { - if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ - { /* IS_NEWLINE sets cb->nllen. */ - ptr += cb->nllen; - break; - } - ptr++; -#ifdef SUPPORT_UNICODE - if (utf) FORWARDCHAR(ptr); -#endif - } - c = *ptr; /* Either NULL or the char after a newline */ - } - } - - /* See if the next thing is a quantifier. */ - - is_quantifier = - c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || - (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); - - /* Fill in length of a previous callout, except when the next thing is a - quantifier or when processing a property substitution string in UCP mode. */ - - if (!is_quantifier && previous_callout != NULL && nestptr == NULL && - after_manual_callout-- <= 0) - { - if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ - complete_callout(previous_callout, ptr, cb); - previous_callout = NULL; - } - - /* Create auto callout, except for quantifiers, or while processing property - strings that are substituted for \w etc in UCP mode. */ - - if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL) - { - previous_callout = code; - code = auto_callout(code, ptr, cb); - } - - /* Process the next pattern item. */ - - switch(c) - { - /* ===================================================================*/ - /* The branch terminates at string end or | or ) */ - - case CHAR_NULL: - if (ptr < cb->end_pattern) goto NORMAL_CHAR; /* Zero data character */ - /* Fall through */ - - case CHAR_VERTICAL_LINE: - case CHAR_RIGHT_PARENTHESIS: - *firstcuptr = firstcu; - *firstcuflagsptr = firstcuflags; - *reqcuptr = reqcu; - *reqcuflagsptr = reqcuflags; - *codeptr = code; - *ptrptr = ptr; - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code)) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += code - last_code; /* To include callout length */ - } - return TRUE; - - - /* ===================================================================*/ - /* Handle single-character metacharacters. In multiline mode, ^ disables - the setting of any following char as a first character. */ - - case CHAR_CIRCUMFLEX_ACCENT: - previous = NULL; - if ((options & PCRE2_MULTILINE) != 0) - { - if (firstcuflags == REQ_UNSET) - zerofirstcuflags = firstcuflags = REQ_NONE; - *code++ = OP_CIRCM; - } - else *code++ = OP_CIRC; - break; - - case CHAR_DOLLAR_SIGN: - previous = NULL; - *code++ = ((options & PCRE2_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; - break; - - /* There can never be a first char if '.' is first, whatever happens about - repeats. The value of reqcu doesn't change either. */ - - case CHAR_DOT: - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - *code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY; - break; - - - /* ===================================================================*/ - /* Character classes. If the included characters are all < 256, we build a - 32-byte bitmap of the permitted characters, except in the special case - where there is only one such character. For negated classes, we build the - map as usual, then invert it at the end. However, we use a different opcode - so that data characters > 255 can be handled correctly. - - If the class contains characters outside the 0-255 range, a different - opcode is compiled. It may optionally have a bit map for characters < 256, - but those above are are explicitly listed afterwards. A flag byte tells - whether the bitmap is present, and whether this is a negated class or not. - - An isolated ']' character is not treated specially, so is just another data - character. In earlier versions of PCRE that used the original API there was - a "JavaScript compatibility mode" in which it gave an error. However, - JavaScript itself has changed in this respect so there is no longer any - need for this special handling. - - In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is - used for "start of word" and "end of word". As these are otherwise illegal - sequences, we don't break anything by recognizing them. They are replaced - by \b(?=\w) and \b(?<=\w) respectively. Sequences like [a[:<:]] are - erroneous and are handled by the normal code below. */ - - case CHAR_LEFT_SQUARE_BRACKET: - if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0) - { - nestptr = ptr + 7; - ptr = sub_start_of_word - 1; - continue; - } - - if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0) - { - nestptr = ptr + 7; - ptr = sub_end_of_word - 1; - continue; - } - - /* Handle a real character class. */ - - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - - /* PCRE supports POSIX class stuff inside a class. Perl gives an error if - they are encountered at the top level, so we'll do that too. */ - - if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && - check_posix_syntax(ptr, &tempptr)) - { - *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR12 : ERR13; - goto FAILED; - } - - /* If the first character is '^', set the negation flag and skip it. Also, - if the first few characters (either before or after ^) are \Q\E or \E we - skip them too. This makes for compatibility with Perl. */ - - negate_class = FALSE; - for (;;) - { - c = *(++ptr); - if (c == CHAR_BACKSLASH) - { - if (ptr[1] == CHAR_E) - ptr++; - else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) - ptr += 3; - else - break; - } - else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) - negate_class = TRUE; - else break; - } - - /* Empty classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. Otherwise, - an initial ']' is taken as a data character -- the code below handles - that. When empty classes are allowed, [] must always fail, so generate - OP_FAIL, whereas [^] must match any character, so generate OP_ALLANY. */ - - if (c == CHAR_RIGHT_SQUARE_BRACKET && - (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) - { - *code++ = negate_class? OP_ALLANY : OP_FAIL; - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - break; - } - - /* If a class contains a negative special such as \S, we need to flip the - negation flag at the end, so that support for characters > 255 works - correctly (they are all included in the class). */ - - should_flip_negation = FALSE; - - /* Extended class (xclass) will be used when characters > 255 - might match. */ - -#ifdef SUPPORT_WIDE_CHARS - xclass = FALSE; - class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ - class_uchardata_base = class_uchardata; /* Save the start */ -#endif - - /* For optimization purposes, we track some properties of the class: - class_has_8bitchar will be non-zero if the class contains at least one 256 - character with a code point less than 256; class_one_char will be 1 if the - class contains just one character; xclass_has_prop will be TRUE if Unicode - property checks are present in the class. */ - - class_has_8bitchar = 0; - class_one_char = 0; -#ifdef SUPPORT_WIDE_CHARS - xclass_has_prop = FALSE; -#endif - - /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map - in a temporary bit of memory, in case the class contains fewer than two - 8-bit characters because in that case the compiled code doesn't use the bit - map. */ - - memset(classbits, 0, 32 * sizeof(uint8_t)); - - /* Process characters until ] is reached. As the test is at the end of the - loop, an initial ] is taken as a data character. At the start of the loop, - c contains the first code unit of the character. If it is zero, check for - the end of the pattern, to allow binary zero as data. */ - - for(;;) - { - PCRE2_SPTR oldptr; - - if (c == CHAR_NULL && ptr >= cb->end_pattern) - { - *errorcodeptr = ERR6; /* Missing terminating ']' */ - goto FAILED; - } - -#ifdef SUPPORT_UNICODE - if (utf && HAS_EXTRALEN(c)) - { /* Braces are required because the */ - GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ - } -#endif - - /* Inside \Q...\E everything is literal except \E */ - - if (inescq) - { - if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */ - { - inescq = FALSE; /* Reset literal state */ - ptr++; /* Skip the 'E' */ - goto CONTINUE_CLASS; /* Carry on with next char */ - } - goto CHECK_RANGE; /* Could be range if \E follows */ - } - - /* Handle POSIX class names. Perl allows a negation extension of the - form [:^name:]. A square bracket that doesn't match the syntax is - treated as a literal. We also recognize the POSIX constructions - [.ch.] and [=ch=] ("collating elements") and fault them, as Perl - 5.6 and 5.8 do. */ - - if (c == CHAR_LEFT_SQUARE_BRACKET && - (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) - { - BOOL local_negate = FALSE; - int posix_class, taboffset, tabopt; - register const uint8_t *cbits = cb->cbits; - uint8_t pbits[32]; - - if (ptr[1] != CHAR_COLON) - { - *errorcodeptr = ERR13; - goto FAILED; - } - - ptr += 2; - if (*ptr == CHAR_CIRCUMFLEX_ACCENT) - { - local_negate = TRUE; - should_flip_negation = TRUE; /* Note negative special */ - ptr++; - } - - posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); - if (posix_class < 0) - { - *errorcodeptr = ERR30; - goto FAILED; - } - - /* If matching is caseless, upper and lower are converted to - alpha. This relies on the fact that the class table starts with - alpha, lower, upper as the first 3 entries. */ - - if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) - posix_class = 0; - - /* When PCRE2_UCP is set, some of the POSIX classes are converted to - different escape sequences that use Unicode properties \p or \P. Others - that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP - directly. UCP support is not available unless UTF support is.*/ - -#ifdef SUPPORT_UNICODE - if ((options & PCRE2_UCP) != 0) - { - unsigned int ptype = 0; - int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); - - /* The posix_substitutes table specifies which POSIX classes can be - converted to \p or \P items. */ - - if (posix_substitutes[pc] != NULL) - { - nestptr = tempptr + 1; - ptr = posix_substitutes[pc] - 1; - goto CONTINUE_CLASS; - } - - /* There are three other classes that generate special property calls - that are recognized only in an XCLASS. */ - - else switch(posix_class) - { - case PC_GRAPH: - ptype = PT_PXGRAPH; - /* Fall through */ - case PC_PRINT: - if (ptype == 0) ptype = PT_PXPRINT; - /* Fall through */ - case PC_PUNCT: - if (ptype == 0) ptype = PT_PXPUNCT; - *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; - *class_uchardata++ = ptype; - *class_uchardata++ = 0; - xclass_has_prop = TRUE; - ptr = tempptr + 1; - goto CONTINUE_CLASS; - - /* For all other POSIX classes, no special action is taken in UCP - mode. Fall through to the non_UCP case. */ - - default: - break; - } - } -#endif /* SUPPORT_UNICODE */ - - /* In the non-UCP case, or when UCP makes no difference, we build the - bit map for the POSIX class in a chunk of local store because we may be - adding and subtracting from it, and we don't want to subtract bits that - may be in the main map already. At the end we or the result into the - bit map that is being built. */ - - posix_class *= 3; - - /* Copy in the first table (always present) */ - - memcpy(pbits, cbits + posix_class_maps[posix_class], - 32 * sizeof(uint8_t)); - - /* If there is a second table, add or remove it as required. */ - - taboffset = posix_class_maps[posix_class + 1]; - tabopt = posix_class_maps[posix_class + 2]; - - if (taboffset >= 0) - { - if (tabopt >= 0) - for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset]; - else - for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset]; - } - - /* Now see if we need to remove any special characters. An option - value of 1 removes vertical space and 2 removes underscore. */ - - if (tabopt < 0) tabopt = -tabopt; - if (tabopt == 1) pbits[1] &= ~0x3c; - else if (tabopt == 2) pbits[11] &= 0x7f; - - /* Add the POSIX table or its complement into the main table that is - being built and we are done. */ - - if (local_negate) - for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c]; - else - for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; - - ptr = tempptr + 1; - /* Every class contains at least one < 256 character. */ - class_has_8bitchar = 1; - /* Every class contains at least two characters. */ - class_one_char = 2; - goto CONTINUE_CLASS; /* End of POSIX syntax handling */ - } - - /* Backslash may introduce a single character, or it may introduce one - of the specials, which just set a flag. The sequence \b is a special - case. Inside a class (and only there) it is treated as backspace. We - assume that other escapes have more than one character in them, so - speculatively set both class_has_8bitchar and class_one_char bigger - than one. Unrecognized escapes fall through and are faulted. */ - - if (c == CHAR_BACKSLASH) - { - escape = check_escape(&ptr, &ec, errorcodeptr, options, TRUE, cb); - if (*errorcodeptr != 0) goto FAILED; - if (escape == 0) c = ec; /* Escaped single char */ - else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ - else if (escape == ESC_N) /* \N is not supported in a class */ - { - *errorcodeptr = ERR71; - goto FAILED; - } - else if (escape == ESC_Q) /* Handle start of quoted string */ - { - if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - { - ptr += 2; /* avoid empty string */ - } - else inescq = TRUE; - goto CONTINUE_CLASS; - } - else if (escape == ESC_E) goto CONTINUE_CLASS; /* Ignore orphan \E */ - - else /* Handle \d-type escapes */ - { - register const uint8_t *cbits = cb->cbits; - /* Every class contains at least two < 256 characters. */ - class_has_8bitchar++; - /* Every class contains at least two characters. */ - class_one_char += 2; - - switch (escape) - { -#ifdef SUPPORT_UNICODE - case ESC_du: /* These are the values given for \d etc */ - case ESC_DU: /* when PCRE2_UCP is set. We replace the */ - case ESC_wu: /* escape sequence with an appropriate \p */ - case ESC_WU: /* or \P to test Unicode properties instead */ - case ESC_su: /* of the default ASCII testing. */ - case ESC_SU: - nestptr = ptr; - ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ - class_has_8bitchar--; /* Undo! */ - break; -#endif - case ESC_d: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; - break; - - case ESC_D: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; - break; - - case ESC_w: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; - break; - - case ESC_W: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; - break; - - /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl - 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was - previously set by something earlier in the character class. - Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so - we could just adjust the appropriate bit. From PCRE 8.34 we no - longer treat \s and \S specially. */ - - case ESC_s: - for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; - break; - - case ESC_S: - should_flip_negation = TRUE; - for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; - break; - - /* The rest apply in both UCP and non-UCP cases. */ - - case ESC_h: - (void)add_list_to_class(classbits, &class_uchardata, options, cb, - PRIV(hspace_list), NOTACHAR); - break; - - case ESC_H: - (void)add_not_list_to_class(classbits, &class_uchardata, options, - cb, PRIV(hspace_list)); - break; - - case ESC_v: - (void)add_list_to_class(classbits, &class_uchardata, options, cb, - PRIV(vspace_list), NOTACHAR); - break; - - case ESC_V: - (void)add_not_list_to_class(classbits, &class_uchardata, options, - cb, PRIV(vspace_list)); - break; - - case ESC_p: - case ESC_P: -#ifdef SUPPORT_UNICODE - { - BOOL negated; - unsigned int ptype = 0, pdata = 0; - if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb)) - goto FAILED; - *class_uchardata++ = ((escape == ESC_p) != negated)? - XCL_PROP : XCL_NOTPROP; - *class_uchardata++ = ptype; - *class_uchardata++ = pdata; - xclass_has_prop = TRUE; - class_has_8bitchar--; /* Undo! */ - } - break; -#else - *errorcodeptr = ERR45; - goto FAILED; -#endif - /* Unrecognized escapes are faulted. */ - - default: - *errorcodeptr = ERR7; - goto FAILED; - } - - /* Handled \d-type escape */ - - goto CONTINUE_CLASS; - } - - /* Control gets here if the escape just defined a single character. - This is in c and may be greater than 256. */ - - escape = 0; - } /* End of backslash handling */ - - /* A character may be followed by '-' to form a range. However, Perl does - not permit ']' to be the end of the range. A '-' character at the end is - treated as a literal. Perl ignores orphaned \E sequences entirely. The - code for handling \Q and \E is messy. */ - - CHECK_RANGE: - while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - { - inescq = FALSE; - ptr += 2; - } - oldptr = ptr; - - /* Remember if \r or \n were explicitly used */ - - if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; - - /* Check for range */ - - if (!inescq && ptr[1] == CHAR_MINUS) - { - uint32_t d; - ptr += 2; - while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2; - - /* If we hit \Q (not followed by \E) at this point, go into escaped - mode. */ - - while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q) - { - ptr += 2; - if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) - { ptr += 2; continue; } - inescq = TRUE; - break; - } - - /* Minus (hyphen) at the end of a class is treated as a literal, so put - back the pointer and jump to handle the character that preceded it. */ - - if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET)) - { - ptr = oldptr; - goto CLASS_SINGLE_CHARACTER; - } - - /* Otherwise, we have a potential range; pick up the next character */ - -#ifdef SUPPORT_UNICODE - if (utf) - { /* Braces are required because the */ - GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ - } - else -#endif - d = *ptr; /* Not UTF mode */ - - /* The second part of a range can be a single-character escape - sequence, but not any of the other escapes. Perl treats a hyphen as a - literal in such circumstances. However, in Perl's warning mode, a - warning is given, so PCRE now faults it as it is almost certainly a - mistake on the user's part. */ - - if (!inescq) - { - if (d == CHAR_BACKSLASH) - { - int descape; - descape = check_escape(&ptr, &d, errorcodeptr, options, TRUE, cb); - if (*errorcodeptr != 0) goto FAILED; - - /* 0 means a character was put into d; \b is backspace; any other - special causes an error. */ - - if (descape != 0) - { - if (descape == ESC_b) d = CHAR_BS; else - { - *errorcodeptr = ERR50; - goto FAILED; - } - } - } - - /* A hyphen followed by a POSIX class is treated in the same way. */ - - else if (d == CHAR_LEFT_SQUARE_BRACKET && - (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && - check_posix_syntax(ptr, &tempptr)) - { - *errorcodeptr = ERR50; - goto FAILED; - } - } - - /* Check that the two values are in the correct order. Optimize - one-character ranges. */ - - if (d < c) - { - *errorcodeptr = ERR8; - goto FAILED; - } - if (d == c) goto CLASS_SINGLE_CHARACTER; /* A few lines below */ - - /* We have found a character range, so single character optimizations - cannot be done anymore. Any value greater than 1 indicates that there - is more than one character. */ - - class_one_char = 2; - - /* Remember an explicit \r or \n, and add the range to the class. */ - - if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; - - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, cb, c, d); - - goto CONTINUE_CLASS; /* Go get the next char in the class */ - } - - /* Handle a single character - we can get here for a normal non-escape - char, or after \ that introduces a single character or for an apparent - range that isn't. Only the value 1 matters for class_one_char, so don't - increase it if it is already 2 or more ... just in case there's a class - with a zillion characters in it. */ - - CLASS_SINGLE_CHARACTER: - if (class_one_char < 2) class_one_char++; - - /* If class_one_char is 1, we have the first single character in the - class, and there have been no prior ranges, or XCLASS items generated by - escapes. If this is the final character in the class, we can optimize by - turning the item into a 1-character OP_CHAR[I] if it's positive, or - OP_NOT[I] if it's negative. In the positive case, it can cause firstcu - to be set. Otherwise, there can be no first char if this item is first, - whatever repeat count may follow. In the case of reqcu, save the - previous value for reinstating. */ - - if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) - { - ptr++; - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - - if (negate_class) - { -#ifdef SUPPORT_UNICODE - int d; -#endif - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - - /* For caseless UTF mode, check whether this character has more than - one other case. If so, generate a special OP_NOTPROP item instead of - OP_NOTI. */ - -#ifdef SUPPORT_UNICODE - if (utf && (options & PCRE2_CASELESS) != 0 && - (d = UCD_CASESET(c)) != 0) - { - *code++ = OP_NOTPROP; - *code++ = PT_CLIST; - *code++ = d; - } - else -#endif - /* Char has only one other case, or UCP not available */ - - { - *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; - code += PUTCHAR(c, code); - } - - /* We are finished with this character class */ - - goto END_CLASS; - } - - /* For a single, positive character, get the value into mcbuffer, and - then we can handle this with the normal one-character code. */ - - mclength = PUTCHAR(c, mcbuffer); - goto ONE_CHAR; - } /* End of 1-char optimization */ - - /* There is more than one character in the class, or an XCLASS item - has been generated. Add this character to the class. */ - - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, cb, c, c); - - /* Continue to the next character in the class. Closing square bracket - not within \Q..\E ends the class. A NULL character terminates a - nested substitution string, but may be a data character in the main - pattern (tested at the start of this loop). */ - - CONTINUE_CLASS: - c = *(++ptr); - if (c == 0 && nestptr != NULL) - { - ptr = nestptr; - nestptr = NULL; - c = *(++ptr); - } - -#ifdef SUPPORT_WIDE_CHARS - /* If any wide characters have been encountered, set xclass = TRUE. Then, - in the pre-compile phase, accumulate the length of the wide characters - and reset the pointer. This is so that very large classes that contain a - zillion wide characters do not overwrite the work space (which is on the - stack). */ - - if (class_uchardata > class_uchardata_base) - { - xclass = TRUE; - if (lengthptr != NULL) - { - *lengthptr += class_uchardata - class_uchardata_base; - class_uchardata = class_uchardata_base; - } - } -#endif - /* An unescaped ] ends the class */ - - if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; - } /* End of main class-processing loop */ - - /* If this is the first thing in the branch, there can be no first char - setting, whatever the repeat count. Any reqcu setting must remain - unchanged after any kind of repeat. */ - - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - - /* If there are characters with values > 255, we have to compile an - extended class, with its own opcode, unless there was a negated special - such as \S in the class, and PCRE2_UCP is not set, because in that case all - characters > 255 are in the class, so any that were explicitly given as - well can be ignored. If (when there are explicit characters > 255 that must - be listed) there are no characters < 256, we can omit the bitmap in the - actual compiled code. */ - -#ifdef SUPPORT_WIDE_CHARS -#ifdef SUPPORT_UNICODE - if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0)) -#elif PCRE2_CODE_UNIT_WIDTH != 8 - if (xclass && !should_flip_negation) -#endif - { - *class_uchardata++ = XCL_END; /* Marks the end of extra data */ - *code++ = OP_XCLASS; - code += LINK_SIZE; - *code = negate_class? XCL_NOT:0; - if (xclass_has_prop) *code |= XCL_HASPROP; - - /* If the map is required, move up the extra data to make room for it; - otherwise just move the code pointer to the end of the extra data. */ - - if (class_has_8bitchar > 0) - { - *code++ |= XCL_MAP; - memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, - CU2BYTES(class_uchardata - code)); - if (negate_class && !xclass_has_prop) - for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; - memcpy(code, classbits, 32); - code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); - } - else code = class_uchardata; - - /* Now fill in the complete length of the item */ - - PUT(previous, 1, (int)(code - previous)); - break; /* End of class handling */ - } -#endif - - /* If there are no characters > 255, or they are all to be included or - excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the - whole class was negated and whether there were negative specials such as \S - (non-UCP) in the class. Then copy the 32-byte map into the code vector, - negating it if necessary. */ - - *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; - if (lengthptr == NULL) /* Save time in the pre-compile phase */ - { - if (negate_class) - for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; - memcpy(code, classbits, 32); - } - code += 32 / sizeof(PCRE2_UCHAR); - - END_CLASS: - break; - - - /* ===================================================================*/ - /* Various kinds of repeat; '{' is not necessarily a quantifier, but this - has been tested above. */ - - case CHAR_LEFT_CURLY_BRACKET: - if (!is_quantifier) goto NORMAL_CHAR; - ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); - if (*errorcodeptr != 0) goto FAILED; - goto REPEAT; - - case CHAR_ASTERISK: - repeat_min = 0; - repeat_max = -1; - goto REPEAT; - - case CHAR_PLUS: - repeat_min = 1; - repeat_max = -1; - goto REPEAT; - - case CHAR_QUESTION_MARK: - repeat_min = 0; - repeat_max = 1; - - REPEAT: - if (previous == NULL) - { - *errorcodeptr = ERR9; - goto FAILED; - } - - if (repeat_min == 0) - { - firstcu = zerofirstcu; /* Adjust for zero repeat */ - firstcuflags = zerofirstcuflags; - reqcu = zeroreqcu; /* Ditto */ - reqcuflags = zeroreqcuflags; - } - - /* Remember whether this is a variable length repeat */ - - reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; - - op_type = 0; /* Default single-char op codes */ - possessive_quantifier = FALSE; /* Default not possessive quantifier */ - - /* Save start of previous item, in case we have to move it up in order to - insert something before it. */ - - tempcode = previous; - - /* Before checking for a possessive quantifier, we must skip over - whitespace and comments in extended mode because Perl allows white space at - this point. */ - - if ((options & PCRE2_EXTENDED) != 0) - { - PCRE2_SPTR p = ptr + 1; - for (;;) - { - while (MAX_255(*p) && (cb->ctypes[*p] & ctype_space) != 0) p++; - if (*p != CHAR_NUMBER_SIGN) break; - p++; - while (*p != CHAR_NULL) - { - if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */ - { /* IS_NEWLINE sets cb->nllen. */ - p += cb->nllen; - break; - } - p++; -#ifdef SUPPORT_UNICODE - if (utf) FORWARDCHAR(p); -#endif - } /* Loop for comment characters */ - } /* Loop for multiple comments */ - ptr = p - 1; /* Character before the next significant one. */ - } - - /* If the next character is '+', we have a possessive quantifier. This - implies greediness, whatever the setting of the PCRE2_UNGREEDY option. - If the next character is '?' this is a minimizing repeat, by default, - but if PCRE2_UNGREEDY is set, it works the other way round. We change the - repeat type to the non-default. */ - - if (ptr[1] == CHAR_PLUS) - { - repeat_type = 0; /* Force greedy */ - possessive_quantifier = TRUE; - ptr++; - } - else if (ptr[1] == CHAR_QUESTION_MARK) - { - repeat_type = greedy_non_default; - ptr++; - } - else repeat_type = greedy_default; - - /* If previous was a recursion call, wrap it in atomic brackets so that - previous becomes the atomic group. All recursions were so wrapped in the - past, but it no longer happens for non-repeated recursions. In fact, the - repeated ones could be re-implemented independently so as not to need this, - but for the moment we rely on the code for repeating groups. */ - - if (*previous == OP_RECURSE) - { - memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); - *previous = OP_ONCE; - PUT(previous, 1, 2 + 2*LINK_SIZE); - previous[2 + 2*LINK_SIZE] = OP_KET; - PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); - code += 2 + 2 * LINK_SIZE; - length_prevgroup = 3 + 3*LINK_SIZE; - - /* When actually compiling, we need to check whether this was a forward - reference, and if so, adjust the offset. */ - - if (lengthptr == NULL && cb->hwm >= cb->start_workspace + LINK_SIZE) - { - int offset = GET(cb->hwm, -LINK_SIZE); - if (offset == previous + 1 - cb->start_code) - PUT(cb->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE); - } - } - - /* Now handle repetition for the different types of item. */ - - /* If previous was a character or negated character match, abolish the item - and generate a repeat item instead. If a char item has a minimum of more - than one, ensure that it is set in reqcu - it might not be if a sequence - such as x{3} is the first thing in a branch because the x will have gone - into firstcu instead. */ - - if (*previous == OP_CHAR || *previous == OP_CHARI - || *previous == OP_NOT || *previous == OP_NOTI) - { - switch (*previous) - { - default: /* Make compiler happy. */ - case OP_CHAR: op_type = OP_STAR - OP_STAR; break; - case OP_CHARI: op_type = OP_STARI - OP_STAR; break; - case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break; - case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break; - } - - /* Deal with UTF characters that take up more than one code unit. It's - easier to write this out separately than try to macrify it. Use c to - hold the length of the character in code units, plus UTF_LENGTH to flag - that it's a length rather than a small character. */ - -#ifdef MAYBE_UTF_MULTI - if (utf && NOT_FIRSTCHAR(code[-1])) - { - PCRE2_UCHAR *lastchar = code - 1; - BACKCHAR(lastchar); - c = (int)(code - lastchar); /* Length of UTF character */ - memcpy(utf_units, lastchar, CU2BYTES(c)); /* Save the char */ - c |= UTF_LENGTH; /* Flag c as a length */ - } - else -#endif /* MAYBE_UTF_MULTI */ - - /* Handle the case of a single charater - either with no UTF support, or - with UTF disabled, or for a single-code-unit UTF character. */ - { - c = code[-1]; - if (*previous <= OP_CHARI && repeat_min > 1) - { - reqcu = c; - reqcuflags = req_caseopt | cb->req_varyopt; - } - } - - goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ - } - - /* If previous was a character type match (\d or similar), abolish it and - create a suitable repeat item. The code is shared with single-character - repeats by setting op_type to add a suitable offset into repeat_type. Note - the the Unicode property types will be present only when SUPPORT_UNICODE is - defined, but we don't wrap the little bits of code here because it just - makes it horribly messy. */ - - else if (*previous < OP_EODN) - { - PCRE2_UCHAR *oldcode; - int prop_type, prop_value; - op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ - c = *previous; /* Save previous opcode */ - if (c == OP_PROP || c == OP_NOTPROP) - { - prop_type = previous[1]; - prop_value = previous[2]; - } - else - { - /* Come here from just above with a character in c */ - OUTPUT_SINGLE_REPEAT: - prop_type = prop_value = -1; - } - - /* At this point we either have prop_type == prop_value == -1 and either - a code point or a character type that is not OP_[NOT]PROP in c, or we - have OP_[NOT]PROP in c and prop_type/prop_value not negative. */ - - oldcode = code; /* Save where we were */ - code = previous; /* Usually overwrite previous item */ - - /* If the maximum is zero then the minimum must also be zero; Perl allows - this case, so we do too - by simply omitting the item altogether. */ - - if (repeat_max == 0) goto END_REPEAT; - - /* Combine the op_type with the repeat_type */ - - repeat_type += op_type; - - /* A minimum of zero is handled either as the special case * or ?, or as - an UPTO, with the maximum given. */ - - if (repeat_min == 0) - { - if (repeat_max == -1) *code++ = OP_STAR + repeat_type; - else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; - else - { - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - - /* A repeat minimum of 1 is optimized into some special cases. If the - maximum is unlimited, we use OP_PLUS. Otherwise, the original item is - left in place and, if the maximum is greater than 1, we use OP_UPTO with - one less than the maximum. */ - - else if (repeat_min == 1) - { - if (repeat_max == -1) - *code++ = OP_PLUS + repeat_type; - else - { - code = oldcode; /* Leave previous item in place */ - if (repeat_max == 1) goto END_REPEAT; - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max - 1); - } - } - - /* The case {n,n} is just an EXACT, while the general case {n,m} is - handled as an EXACT followed by an UPTO or STAR or QUERY. */ - - else - { - *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ - PUT2INC(code, 0, repeat_min); - - /* Unless repeat_max equals repeat_min, fill in the data for EXACT, and - then generate the second opcode. In UTF mode, multi-code-unit - characters have their length in c, with the UTF_LENGTH bit as a flag, - and the code units in utf_units. For a repeated Unicode property match, - there are two extra values that define the required property, and c - never has the UTF_LENGTH bit set. */ - - if (repeat_max != repeat_min) - { -#ifdef MAYBE_UTF_MULTI - if (utf && (c & UTF_LENGTH) != 0) - { - memcpy(code, utf_units, CU2BYTES(c & 7)); - code += c & 7; - } - else -#endif /* MAYBE_UTF_MULTI */ - { - *code++ = c; - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } - } - - /* Now set up the following opcode */ - - if (repeat_max < 0) *code++ = OP_STAR + repeat_type; else - { - repeat_max -= repeat_min; - if (repeat_max == 1) - { - *code++ = OP_QUERY + repeat_type; - } - else - { - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - } - } - - /* Fill in the character or character type for the final opcode. */ - -#ifdef MAYBE_UTF_MULTI - if (utf && (c & UTF_LENGTH) != 0) - { - memcpy(code, utf_units, CU2BYTES(c & 7)); - code += c & 7; - } - else -#endif /* MAYBEW_UTF_MULTI */ - { - *code++ = c; - if (prop_type >= 0) - { - *code++ = prop_type; - *code++ = prop_value; - } - } - } - - /* If previous was a character class or a back reference, we put the repeat - stuff after it, but just skip the item if the repeat was {0,0}. */ - - else if (*previous == OP_CLASS || *previous == OP_NCLASS || -#ifdef SUPPORT_WIDE_CHARS - *previous == OP_XCLASS || -#endif - *previous == OP_REF || *previous == OP_REFI || - *previous == OP_DNREF || *previous == OP_DNREFI) - { - if (repeat_max == 0) - { - code = previous; - goto END_REPEAT; - } - - if (repeat_min == 0 && repeat_max == -1) - *code++ = OP_CRSTAR + repeat_type; - else if (repeat_min == 1 && repeat_max == -1) - *code++ = OP_CRPLUS + repeat_type; - else if (repeat_min == 0 && repeat_max == 1) - *code++ = OP_CRQUERY + repeat_type; - else - { - *code++ = OP_CRRANGE + repeat_type; - PUT2INC(code, 0, repeat_min); - if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ - PUT2INC(code, 0, repeat_max); - } - } - - /* If previous was a bracket group, we may have to replicate it in certain - cases. Note that at this point we can encounter only the "basic" bracket - opcodes such as BRA and CBRA, as this is the place where they get converted - into the more special varieties such as BRAPOS and SBRA. A test for >= - OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK, - ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND. - Originally, PCRE did not allow repetition of assertions, but now it does, - for Perl compatibility. */ - - else if (*previous >= OP_ASSERT && *previous <= OP_COND) - { - register int i; - int len = (int)(code - previous); - size_t base_hwm_offset = item_hwm_offset; - PCRE2_UCHAR *bralink = NULL; - PCRE2_UCHAR *brazeroptr = NULL; - - /* Repeating a DEFINE group (or any group where the condition is always - FALSE and there is only one branch) is pointless, but Perl allows the - syntax, so we just ignore the repeat. */ - - if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE && - previous[GET(previous, 1)] != OP_ALT) - goto END_REPEAT; - - /* There is no sense in actually repeating assertions. The only potential - use of repetition is in cases when the assertion is optional. Therefore, - if the minimum is greater than zero, just ignore the repeat. If the - maximum is not zero or one, set it to 1. */ - - if (*previous < OP_ONCE) /* Assertion */ - { - if (repeat_min > 0) goto END_REPEAT; - if (repeat_max < 0 || repeat_max > 1) repeat_max = 1; - } - - /* The case of a zero minimum is special because of the need to stick - OP_BRAZERO in front of it, and because the group appears once in the - data, whereas in other cases it appears the minimum number of times. For - this reason, it is simplest to treat this case separately, as otherwise - the code gets far too messy. There are several special subcases when the - minimum is zero. */ - - if (repeat_min == 0) - { - /* If the maximum is also zero, we used to just omit the group from the - output altogether, like this: - - ** if (repeat_max == 0) - ** { - ** code = previous; - ** goto END_REPEAT; - ** } - - However, that fails when a group or a subgroup within it is referenced - as a subroutine from elsewhere in the pattern, so now we stick in - OP_SKIPZERO in front of it so that it is skipped on execution. As we - don't have a list of which groups are referenced, we cannot do this - selectively. - - If the maximum is 1 or unlimited, we just have to stick in the BRAZERO - and do no more at this point. However, we do need to adjust any - OP_RECURSE calls inside the group that refer to the group itself or any - internal or forward referenced group, because the offset is from the - start of the whole regex. Temporarily terminate the pattern while doing - this. */ - - if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ - { - *code = OP_END; - adjust_recurse(previous, 1, utf, cb, item_hwm_offset); - memmove(previous + 1, previous, CU2BYTES(len)); - code++; - if (repeat_max == 0) - { - *previous++ = OP_SKIPZERO; - goto END_REPEAT; - } - brazeroptr = previous; /* Save for possessive optimizing */ - *previous++ = OP_BRAZERO + repeat_type; - } - - /* If the maximum is greater than 1 and limited, we have to replicate - in a nested fashion, sticking OP_BRAZERO before each set of brackets. - The first one has to be handled carefully because it's the original - copy, which has to be moved up. The remainder can be handled by code - that is common with the non-zero minimum case below. We have to - adjust the value or repeat_max, since one less copy is required. Once - again, we may have to adjust any OP_RECURSE calls inside the group. */ - - else - { - int offset; - *code = OP_END; - adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, item_hwm_offset); - memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); - code += 2 + LINK_SIZE; - *previous++ = OP_BRAZERO + repeat_type; - *previous++ = OP_BRA; - - /* We chain together the bracket offset fields that have to be - filled in later when the ends of the brackets are reached. */ - - offset = (bralink == NULL)? 0 : (int)(previous - bralink); - bralink = previous; - PUTINC(previous, 0, offset); - } - - repeat_max--; - } - - /* If the minimum is greater than zero, replicate the group as many - times as necessary, and adjust the maximum to the number of subsequent - copies that we need. If we set a first char from the group, and didn't - set a required char, copy the latter from the former. If there are any - forward reference subroutine calls in the group, there will be entries on - the workspace list; replicate these with an appropriate increment. */ - - else - { - if (repeat_min > 1) - { - /* In the pre-compile phase, we don't actually do the replication. We - just adjust the length as if we had. Do some paranoid checks for - potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit - integer type when available, otherwise double. */ - - if (lengthptr != NULL) - { - size_t delta = (repeat_min - 1)*length_prevgroup; - if ((INT64_OR_DOUBLE)(repeat_min - 1)* - (INT64_OR_DOUBLE)length_prevgroup > - (INT64_OR_DOUBLE)INT_MAX || - OFLOW_MAX - *lengthptr < delta) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += delta; - } - - /* This is compiling for real. If there is a set first byte for - the group, and we have not yet set a "required byte", set it. Make - sure there is enough workspace for copying forward references before - doing the copy. */ - - else - { - if (groupsetfirstcu && reqcuflags < 0) - { - reqcu = firstcu; - reqcuflags = firstcuflags; - } - - for (i = 1; i < repeat_min; i++) - { - PCRE2_UCHAR *hc; - size_t this_hwm_offset = cb->hwm - cb->start_workspace; - memcpy(code, previous, CU2BYTES(len)); - - while (cb->hwm > cb->start_workspace + cb->workspace_size - - WORK_SIZE_SAFETY_MARGIN - - (this_hwm_offset - base_hwm_offset)) - { - *errorcodeptr = expand_workspace(cb); - if (*errorcodeptr != 0) goto FAILED; - } - - for (hc = (PCRE2_UCHAR *)cb->start_workspace + base_hwm_offset; - hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset; - hc += LINK_SIZE) - { - PUT(cb->hwm, 0, GET(hc, 0) + len); - cb->hwm += LINK_SIZE; - } - base_hwm_offset = this_hwm_offset; - code += len; - } - } - } - - if (repeat_max > 0) repeat_max -= repeat_min; - } - - /* This code is common to both the zero and non-zero minimum cases. If - the maximum is limited, it replicates the group in a nested fashion, - remembering the bracket starts on a stack. In the case of a zero minimum, - the first one was set up above. In all cases the repeat_max now specifies - the number of additional copies needed. Again, we must remember to - replicate entries on the forward reference list. */ - - if (repeat_max >= 0) - { - /* In the pre-compile phase, we don't actually do the replication. We - just adjust the length as if we had. For each repetition we must add 1 - to the length for BRAZERO and for all but the last repetition we must - add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some - paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is - a 64-bit integer type when available, otherwise double. */ - - if (lengthptr != NULL && repeat_max > 0) - { - size_t delta = repeat_max*(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - - 2 - 2*LINK_SIZE; /* Last one doesn't nest */ - if ((INT64_OR_DOUBLE)repeat_max * - (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - > (INT64_OR_DOUBLE)INT_MAX || - OFLOW_MAX - *lengthptr < delta) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += delta; - } - - /* This is compiling for real */ - - else for (i = repeat_max - 1; i >= 0; i--) - { - PCRE2_UCHAR *hc; - size_t this_hwm_offset = cb->hwm - cb->start_workspace; - - *code++ = OP_BRAZERO + repeat_type; - - /* All but the final copy start a new nesting, maintaining the - chain of brackets outstanding. */ - - if (i != 0) - { - int offset; - *code++ = OP_BRA; - offset = (bralink == NULL)? 0 : (int)(code - bralink); - bralink = code; - PUTINC(code, 0, offset); - } - - memcpy(code, previous, CU2BYTES(len)); - - /* Ensure there is enough workspace for forward references before - copying them. */ - - while (cb->hwm > cb->start_workspace + cb->workspace_size - - WORK_SIZE_SAFETY_MARGIN - - (this_hwm_offset - base_hwm_offset)) - { - *errorcodeptr = expand_workspace(cb); - if (*errorcodeptr != 0) goto FAILED; - } - - for (hc = (PCRE2_UCHAR *)cb->start_workspace + base_hwm_offset; - hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset; - hc += LINK_SIZE) - { - PUT(cb->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); - cb->hwm += LINK_SIZE; - } - base_hwm_offset = this_hwm_offset; - code += len; - } - - /* Now chain through the pending brackets, and fill in their length - fields (which are holding the chain links pro tem). */ - - while (bralink != NULL) - { - int oldlinkoffset; - int offset = (int)(code - bralink + 1); - PCRE2_UCHAR *bra = code - offset; - oldlinkoffset = GET(bra, 1); - bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; - *code++ = OP_KET; - PUTINC(code, 0, offset); - PUT(bra, 1, offset); - } - } - - /* If the maximum is unlimited, set a repeater in the final copy. For - ONCE brackets, that's all we need to do. However, possessively repeated - ONCE brackets can be converted into non-capturing brackets, as the - behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to - deal with possessive ONCEs specially. - - Otherwise, when we are doing the actual compile phase, check to see - whether this group is one that could match an empty string. If so, - convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so - that runtime checking can be done. [This check is also applied to ONCE - groups at runtime, but in a different way.] - - Then, if the quantifier was possessive and the bracket is not a - conditional, we convert the BRA code to the POS form, and the KET code to - KETRPOS. (It turns out to be convenient at runtime to detect this kind of - subpattern at both the start and at the end.) The use of special opcodes - makes it possible to reduce greatly the stack usage in pcre_exec(). If - the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. - - Then, if the minimum number of matches is 1 or 0, cancel the possessive - flag so that the default action below, of wrapping everything inside - atomic brackets, does not happen. When the minimum is greater than 1, - there will be earlier copies of the group, and so we still have to wrap - the whole thing. */ - - else - { - PCRE2_UCHAR *ketcode = code - 1 - LINK_SIZE; - PCRE2_UCHAR *bracode = ketcode - GET(ketcode, 1); - - /* Convert possessive ONCE brackets to non-capturing */ - - if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && - possessive_quantifier) *bracode = OP_BRA; - - /* For non-possessive ONCE brackets, all we need to do is to - set the KET. */ - - if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) - *ketcode = OP_KETRMAX + repeat_type; - - /* Handle non-ONCE brackets and possessive ONCEs (which have been - converted to non-capturing above). */ - - else - { - /* In the compile phase, check for empty string matching. */ - - if (lengthptr == NULL) - { - PCRE2_UCHAR *scode = bracode; - do - { - if (could_be_empty_branch(scode, ketcode, utf, cb, NULL)) - { - *bracode += OP_SBRA - OP_BRA; - break; - } - scode += GET(scode, 1); - } - while (*scode == OP_ALT); - } - - /* Handle possessive quantifiers. */ - - if (possessive_quantifier) - { - /* For COND brackets, we wrap the whole thing in a possessively - repeated non-capturing bracket, because we have not invented POS - versions of the COND opcodes. Because we are moving code along, we - must ensure that any pending recursive references are updated. */ - - if (*bracode == OP_COND || *bracode == OP_SCOND) - { - int nlen = (int)(code - bracode); - *code = OP_END; - adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, item_hwm_offset); - memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); - code += 1 + LINK_SIZE; - nlen += 1 + LINK_SIZE; - *bracode = OP_BRAPOS; - *code++ = OP_KETRPOS; - PUTINC(code, 0, nlen); - PUT(bracode, 1, nlen); - } - - /* For non-COND brackets, we modify the BRA code and use KETRPOS. */ - - else - { - *bracode += 1; /* Switch to xxxPOS opcodes */ - *ketcode = OP_KETRPOS; - } - - /* If the minimum is zero, mark it as possessive, then unset the - possessive flag when the minimum is 0 or 1. */ - - if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; - if (repeat_min < 2) possessive_quantifier = FALSE; - } - - /* Non-possessive quantifier */ - - else *ketcode = OP_KETRMAX + repeat_type; - } - } - } - - /* If previous is OP_FAIL, it was generated by an empty class [] - (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be - generated, that is by (*FAIL) or (?!), set previous to NULL, which gives a - "nothing to repeat" error above. We can just ignore the repeat in empty - class case. */ - - else if (*previous == OP_FAIL) goto END_REPEAT; - - /* Else there's some kind of shambles */ - - else - { - *errorcodeptr = ERR10; - goto FAILED; - } - - /* If the character following a repeat is '+', possessive_quantifier is - TRUE. For some opcodes, there are special alternative opcodes for this - case. For anything else, we wrap the entire repeated item inside OP_ONCE - brackets. Logically, the '+' notation is just syntactic sugar, taken from - Sun's Java package, but the special opcodes can optimize it. - - Some (but not all) possessively repeated subpatterns have already been - completely handled in the code just above. For them, possessive_quantifier - is always FALSE at this stage. Note that the repeated item starts at - tempcode, not at previous, which might be the first part of a string whose - (former) last char we repeated. */ - - if (possessive_quantifier) - { - int len; - - /* Possessifying an EXACT quantifier has no effect, so we can ignore it. - However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6}, - {5,}, or {5,10}). We skip over an EXACT item; if the length of what - remains is greater than zero, there's a further opcode that can be - handled. If not, do nothing, leaving the EXACT alone. */ - - switch(*tempcode) - { - case OP_TYPEEXACT: - tempcode += PRIV(OP_lengths)[*tempcode] + - ((tempcode[1 + IMM2_SIZE] == OP_PROP - || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); - break; - - /* CHAR opcodes are used for exacts whose count is 1. */ - - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_EXACT: - case OP_EXACTI: - case OP_NOTEXACT: - case OP_NOTEXACTI: - tempcode += PRIV(OP_lengths)[*tempcode]; -#ifdef SUPPORT_UNICODE - if (utf && HAS_EXTRALEN(tempcode[-1])) - tempcode += GET_EXTRALEN(tempcode[-1]); -#endif - break; - - /* For the class opcodes, the repeat operator appears at the end; - adjust tempcode to point to it. */ - - case OP_CLASS: - case OP_NCLASS: - tempcode += 1 + 32/sizeof(PCRE2_UCHAR); - break; - -#ifdef SUPPORT_WIDE_CHARS - case OP_XCLASS: - tempcode += GET(tempcode, 1); - break; -#endif - } - - /* If tempcode is equal to code (which points to the end of the repeated - item), it means we have skipped an EXACT item but there is no following - QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In - all other cases, tempcode will be pointing to the repeat opcode, and will - be less than code, so the value of len will be greater than 0. */ - - len = (int)(code - tempcode); - if (len > 0) - { - unsigned int repcode = *tempcode; - - /* There is a table for possessifying opcodes, all of which are less - than OP_CALLOUT. A zero entry means there is no possessified version. - */ - - if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0) - *tempcode = opcode_possessify[repcode]; - - /* For opcode without a special possessified version, wrap the item in - ONCE brackets. Because we are moving code along, we must ensure that - any pending recursive references are updated. */ - - else - { - *code = OP_END; - adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, item_hwm_offset); - memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); - code += 1 + LINK_SIZE; - len += 1 + LINK_SIZE; - tempcode[0] = OP_ONCE; - *code++ = OP_KET; - PUTINC(code, 0, len); - PUT(tempcode, 1, len); - } - } - } - - /* In all case we no longer have a previous item. We also set the - "follows varying string" flag for subsequently encountered reqcus if - it isn't already set and we have just passed a varying length item. */ - - END_REPEAT: - previous = NULL; - cb->req_varyopt |= reqvary; - break; - - - /* ===================================================================*/ - /* Start of nested parenthesized sub-expression, or comment or lookahead or - lookbehind or option setting or condition or all the other extended - parenthesis forms. We must save the current high-water-mark for the - forward reference list so that we know where they start for this group. - However, because the list may be extended when there are very many forward - references (usually the result of a replicated inner group), we must use - an offset rather than an absolute address. */ - - case CHAR_LEFT_PARENTHESIS: - ptr++; - - /* First deal with comments. Putting this code right at the start ensures - that comments have no bad side effects. */ - - if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) - { - ptr += 2; - while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR18; - goto FAILED; - } - continue; - } - - /* Now deal with various "verbs" that can be introduced by '*'. */ - - if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' - || (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0)))) - { - int i, namelen; - int arglen = 0; - const char *vn = verbnames; - PCRE2_SPTR name = ptr + 1; - PCRE2_SPTR arg = NULL; - previous = NULL; - ptr++; - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_letter) != 0) ptr++; - namelen = (int)(ptr - name); - - /* It appears that Perl allows any characters whatsoever, other than - a closing parenthesis, to appear in arguments, so we no longer insist on - letters, digits, and underscores. */ - - if (*ptr == CHAR_COLON) - { - arg = ++ptr; - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - arglen = (int)(ptr - arg); - if ((unsigned int)arglen > MAX_MARK) - { - *errorcodeptr = ERR76; - goto FAILED; - } - } - - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR60; - goto FAILED; - } - - /* Scan the table of verb names */ - - for (i = 0; i < verbcount; i++) - { - if (namelen == verbs[i].len && - PRIV(strncmp_c8)(name, vn, namelen) == 0) - { - int setverb; - - /* Check for open captures before ACCEPT and convert it to - ASSERT_ACCEPT if in an assertion. */ - - if (verbs[i].op == OP_ACCEPT) - { - open_capitem *oc; - if (arglen != 0) - { - *errorcodeptr = ERR59; - goto FAILED; - } - cb->had_accept = TRUE; - for (oc = cb->open_caps; oc != NULL; oc = oc->next) - { - *code++ = OP_CLOSE; - PUT2INC(code, 0, oc->number); - } - setverb = *code++ = - (cb->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; - - /* Do not set firstcu after *ACCEPT */ - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - } - - /* Handle other cases with/without an argument */ - - else if (arglen == 0) - { - if (verbs[i].op < 0) /* Argument is mandatory */ - { - *errorcodeptr = ERR66; - goto FAILED; - } - setverb = *code++ = verbs[i].op; - } - - else - { - if (verbs[i].op_arg < 0) /* Argument is forbidden */ - { - *errorcodeptr = ERR59; - goto FAILED; - } - setverb = *code++ = verbs[i].op_arg; - *code++ = arglen; - memcpy(code, arg, CU2BYTES(arglen)); - code += arglen; - *code++ = 0; - } - - switch (setverb) - { - case OP_THEN: - case OP_THEN_ARG: - cb->external_flags |= PCRE2_HASTHEN; - break; - - case OP_PRUNE: - case OP_PRUNE_ARG: - case OP_SKIP: - case OP_SKIP_ARG: - cb->had_pruneorskip = TRUE; - break; - } - - break; /* Found verb, exit loop */ - } - - vn += verbs[i].len + 1; - } - - if (i < verbcount) continue; /* Successfully handled a verb */ - *errorcodeptr = ERR60; /* Verb not recognized */ - goto FAILED; - } - - /* Initialization for "real" parentheses */ - - newoptions = options; - skipunits = 0; - bravalue = OP_CBRA; - reset_bracount = FALSE; - - /* Deal with the extended parentheses; all are introduced by '?', and the - appearance of any of them means that this is not a capturing group. */ - - if (*ptr == CHAR_QUESTION_MARK) - { - int i, count; - int namelen; /* Must be signed */ - uint32_t index; - uint32_t set, unset, *optset; - named_group *ng; - PCRE2_SPTR name; - PCRE2_UCHAR *slot; - - switch (*(++ptr)) - { - /* ------------------------------------------------------------ */ - case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ - reset_bracount = TRUE; - /* Fall through */ - - /* ------------------------------------------------------------ */ - case CHAR_COLON: /* Non-capturing bracket */ - bravalue = OP_BRA; - ptr++; - break; - - /* ------------------------------------------------------------ */ - case CHAR_LEFT_PARENTHESIS: - bravalue = OP_COND; /* Conditional group */ - tempptr = ptr; - - /* A condition can be an assertion, a number (referring to a numbered - group's having been set), a name (referring to a named group), or 'R', - referring to recursion. R and R&name are also permitted for - recursion tests. - - There are ways of testing a named group: (?(name)) is used by Python; - Perl 5.10 onwards uses (?() or (?('name')). - - There is one unfortunate ambiguity, caused by history. 'R' can be the - recursive thing or the name 'R' (and similarly for 'R' followed by - digits). We look for a name first; if not found, we try the other case. - - For compatibility with auto-callouts, we allow a callout to be - specified before a condition that is an assertion. First, check for the - syntax of a callout; if found, adjust the temporary pointer that is - used to check for an assertion condition. That's all that is needed! */ - - if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C) - { - if (IS_DIGIT(ptr[3]) || ptr[3] == CHAR_RIGHT_PARENTHESIS) - { - for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; - if (ptr[i] == CHAR_RIGHT_PARENTHESIS) - tempptr += i + 1; - } - else - { - uint32_t delimiter = 0; - for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) - { - if (ptr[3] == PRIV(callout_start_delims)[i]) - { - delimiter = PRIV(callout_end_delims)[i]; - break; - } - } - if (delimiter != 0) - { - for (i = 4; ptr + i < cb->end_pattern; i++) - { - if (ptr[i] == delimiter) - { - if (ptr[i+1] == delimiter) i++; - else - { - if (ptr[i+1] == CHAR_RIGHT_PARENTHESIS) tempptr += i + 2; - break; - } - } - } - } - } - - /* tempptr should now be pointing to the opening parenthesis of the - assertion condition. */ - - if (*tempptr != CHAR_LEFT_PARENTHESIS) - { - *errorcodeptr = ERR28; - goto FAILED; - } - } - - /* For conditions that are assertions, check the syntax, and then exit - the switch. This will take control down to where bracketed groups - are processed. The assertion will be handled as part of the group, - but we need to identify this case because the conditional assertion may - not be quantifier. */ - - if (tempptr[1] == CHAR_QUESTION_MARK && - (tempptr[2] == CHAR_EQUALS_SIGN || - tempptr[2] == CHAR_EXCLAMATION_MARK || - (tempptr[2] == CHAR_LESS_THAN_SIGN && - (tempptr[3] == CHAR_EQUALS_SIGN || - tempptr[3] == CHAR_EXCLAMATION_MARK)))) - { - cb->iscondassert = TRUE; - break; - } - - /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all - need to skip at least 1+IMM2_SIZE bytes at the start of the group. */ - - code[1+LINK_SIZE] = OP_CREF; - skipunits = 1+IMM2_SIZE; - refsign = -1; /* => not a number */ - namelen = -1; /* => not a name; must set to avoid warning */ - name = NULL; /* Always set to avoid warning */ - recno = 0; /* Always set to avoid warning */ - - /* Point at character after (?( */ - - ptr++; - - /* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect - users of PCRE2 via an application can discover which release of PCRE2 - is being used. */ - - if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 && - ptr[7] != CHAR_RIGHT_PARENTHESIS) - { - BOOL ge = FALSE; - int major = 0; - int minor = 0; - - ptr += 7; - if (*ptr == CHAR_GREATER_THAN_SIGN) - { - ge = TRUE; - ptr++; - } - - /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT - references its argument twice. */ - - if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr))) - { - *errorcodeptr = ERR79; - goto FAILED; - } - - while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0'; - if (*ptr == CHAR_DOT) - { - ptr++; - while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0'; - } - - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR79; - goto FAILED; - } - - if (ge) - code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) || - (PCRE2_MAJOR == major && PCRE2_MINOR >= minor))? - OP_TRUE : OP_FALSE; - else - code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)? - OP_TRUE : OP_FALSE; - - ptr++; - skipunits = 1; - break; /* End of condition processing */ - } - - /* Check for a test for recursion in a named group. */ - - if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND) - { - terminator = -1; - ptr += 2; - code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */ - } - - /* Check for a test for a named group's having been set, using the Perl - syntax (?() or (?('name'), and also allow for the original PCRE - syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */ - - else if (*ptr == CHAR_LESS_THAN_SIGN) - { - terminator = CHAR_GREATER_THAN_SIGN; - ptr++; - } - else if (*ptr == CHAR_APOSTROPHE) - { - terminator = CHAR_APOSTROPHE; - ptr++; - } - else - { - terminator = CHAR_NULL; - if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++; - else if (IS_DIGIT(*ptr)) refsign = 0; - } - - /* Handle a number */ - - if (refsign >= 0) - { - while (IS_DIGIT(*ptr)) - { - recno = recno * 10 + (int)(*ptr - CHAR_0); - ptr++; - } - } - - /* Otherwise we expect to read a name; anything else is an error. When - the referenced name is one of a number of duplicates, a different - opcode is used and it needs more memory. Unfortunately we cannot tell - whether this is the case in the first pass, so we have to allow for - more memory always. In the second pass, the additional to skipunits - happens later. */ - - else - { - if (IS_DIGIT(*ptr)) - { - *errorcodeptr = ERR44; /* Group name must start with non-digit */ - goto FAILED; - } - if (!MAX_255(*ptr) || (cb->ctypes[*ptr] & ctype_word) == 0) - { - *errorcodeptr = ERR28; /* Assertion expected */ - goto FAILED; - } - name = ptr++; - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) - { - ptr++; - } - namelen = (int)(ptr - name); - if (lengthptr != NULL) skipunits += IMM2_SIZE; - } - - /* Check the terminator */ - - if ((terminator > 0 && *ptr++ != (PCRE2_UCHAR)terminator) || - *ptr++ != CHAR_RIGHT_PARENTHESIS) - { - ptr--; /* Error offset */ - *errorcodeptr = ERR26; /* Malformed number or name */ - goto FAILED; - } - - /* Do no further checking in the pre-compile phase. */ - - if (lengthptr != NULL) break; - - /* In the real compile we do the work of looking for the actual - reference. If refsign is not negative, it means we have a number in - recno. */ - - if (refsign >= 0) - { - if (recno <= 0) - { - *errorcodeptr = ERR35; - goto FAILED; - } - if (refsign != 0) recno = (refsign == CHAR_MINUS)? - cb->bracount - recno + 1 : recno + cb->bracount; - if (recno <= 0 || (uint32_t)recno > cb->final_bracount) - { - *errorcodeptr = ERR15; - goto FAILED; - } - PUT2(code, 2+LINK_SIZE, recno); - if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; - break; - } - - /* Otherwise look for the name. */ - - slot = cb->name_table; - for (i = 0; i < cb->names_found; i++) - { - if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0) break; - slot += cb->name_entry_size; - } - - /* Found the named subpattern. If the name is duplicated, add one to - the opcode to change CREF/RREF into DNCREF/DNRREF and insert - appropriate data values. Otherwise, just insert the unique subpattern - number. */ - - if (i < cb->names_found) - { - int offset = i; /* Offset of first name found */ - - count = 0; - for (;;) - { - recno = GET2(slot, 0); /* Number for last found */ - if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; - count++; - if (++i >= cb->names_found) break; - slot += cb->name_entry_size; - if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 || - (slot+IMM2_SIZE)[namelen] != 0) break; - } - - if (count > 1) - { - PUT2(code, 2+LINK_SIZE, offset); - PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count); - skipunits += IMM2_SIZE; - code[1+LINK_SIZE]++; - } - else /* Not a duplicated name */ - { - PUT2(code, 2+LINK_SIZE, recno); - } - } - - /* If terminator == CHAR_NULL it means that the name followed directly - after the opening parenthesis [e.g. (?(abc)...] and in this case there - are some further alternatives to try. For the cases where terminator != - CHAR_NULL [things like (?(... or (?('name')... or (?(R&name)... ] - we have now checked all the possibilities, so give an error. */ - - else if (terminator != CHAR_NULL) - { - *errorcodeptr = ERR15; - goto FAILED; - } - - /* Check for (?(R) for recursion. Allow digits after R to specify a - specific group number. */ - - else if (*name == CHAR_R) - { - recno = 0; - for (i = 1; i < namelen; i++) - { - if (!IS_DIGIT(name[i])) - { - *errorcodeptr = ERR15; - goto FAILED; - } - recno = recno * 10 + name[i] - CHAR_0; - } - if (recno == 0) recno = RREF_ANY; - code[1+LINK_SIZE] = OP_RREF; /* Change test type */ - PUT2(code, 2+LINK_SIZE, recno); - } - - /* Similarly, check for the (?(DEFINE) "condition", which is always - false. During compilation we set OP_DEFINE to distinguish this from - other OP_FALSE conditions so that it can be checked for having only one - branch, but after that the opcode is changed to OP_FALSE. */ - - else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0) - { - code[1+LINK_SIZE] = OP_DEFINE; - skipunits = 1; - } - - /* Reference to an unidentified subpattern. */ - - else - { - *errorcodeptr = ERR15; - goto FAILED; - } - break; - - - /* ------------------------------------------------------------ */ - case CHAR_EQUALS_SIGN: /* Positive lookahead */ - bravalue = OP_ASSERT; - cb->assert_depth += 1; - ptr++; - break; - - /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird - thing to do, but Perl allows all assertions to be quantified, and when - they contain capturing parentheses there may be a potential use for - this feature. Not that that applies to a quantified (?!) but we allow - it for uniformity. */ - - /* ------------------------------------------------------------ */ - case CHAR_EXCLAMATION_MARK: /* Negative lookahead */ - ptr++; - if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK && - ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK && - (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2))) - { - *code++ = OP_FAIL; - previous = NULL; - continue; - } - bravalue = OP_ASSERT_NOT; - cb->assert_depth += 1; - break; - - - /* ------------------------------------------------------------ */ - case CHAR_LESS_THAN_SIGN: /* Lookbehind or named define */ - switch (ptr[1]) - { - case CHAR_EQUALS_SIGN: /* Positive lookbehind */ - bravalue = OP_ASSERTBACK; - cb->assert_depth += 1; - ptr += 2; - break; - - case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */ - bravalue = OP_ASSERTBACK_NOT; - cb->assert_depth += 1; - ptr += 2; - break; - - /* Must be a name definition - as the syntax was checked in the - pre-pass, we can assume here that it is valid. Skip over the name - and go to handle the numbered group. */ - - default: - while (*(++ptr) != CHAR_GREATER_THAN_SIGN); - ptr++; - goto NUMBERED_GROUP; - } - break; - - - /* ------------------------------------------------------------ */ - case CHAR_GREATER_THAN_SIGN: /* One-time brackets */ - bravalue = OP_ONCE; - ptr++; - break; - - - /* ------------------------------------------------------------ */ - case CHAR_C: /* Callout */ - previous_callout = code; /* Save for later completion */ - after_manual_callout = 1; /* Skip one item before completing */ - ptr++; /* Character after (?C */ - - /* A callout may have a string argument, delimited by one of a fixed - number of characters, or an undelimited numerical argument, or no - argument, which is the same as (?C0). Different opcodes are used for - the two cases. */ - - if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr)) - { - uint32_t delimiter = 0; - - for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) - { - if (*ptr == PRIV(callout_start_delims)[i]) - { - delimiter = PRIV(callout_end_delims)[i]; - break; - } - } - - if (delimiter == 0) - { - *errorcodeptr = ERR82; - goto FAILED; - } - - /* During the pre-compile phase, we parse the string and update the - length. There is no need to generate any code. */ - - if (lengthptr != NULL) /* Only check the string */ - { - PCRE2_SPTR start = ptr; - do - { - if (++ptr >= cb->end_pattern) - { - *errorcodeptr = ERR81; - ptr = start; /* To give a more useful message */ - goto FAILED; - } - if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; - } - while (ptr[0] != delimiter); - - /* Start points to the opening delimiter, ptr points to the - closing delimiter. We must allow for including the delimiter and - for the terminating zero. Any doubled delimiters within the string - make this an overestimate, but it is not worth bothering about. */ - - (*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE); - } - - /* In the real compile we can copy the string, knowing that it is - syntactically OK. The starting delimiter is included so that the - client can discover it if they want. We also pass the start offset to - help a script language give better error messages. */ - - else - { - PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE); - *callout_string++ = *ptr++; - PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */ - for(;;) - { - if (*ptr == delimiter) - { - if (ptr[1] == delimiter) ptr++; else break; - } - *callout_string++ = *ptr++; - } - *callout_string++ = CHAR_NULL; - code[0] = OP_CALLOUT_STR; - PUT(code, 1, (int)(ptr + 2 - cb->start_pattern)); /* Next offset */ - PUT(code, 1 + LINK_SIZE, 0); /* Default length */ - PUT(code, 1 + 2*LINK_SIZE, /* Compute size */ - (int)(callout_string - code)); - code = callout_string; - } - - /* Advance to what should be the closing parenthesis, which is - checked below. */ - - ptr++; - } - - /* Handle a callout with an optional numerical argument, which must be - less than or equal to 255. A missing argument gives 0. */ - - else - { - int n = 0; - code[0] = OP_CALLOUT; /* Numerical callout */ - while (IS_DIGIT(*ptr)) - { - n = n * 10 + *ptr++ - CHAR_0; - if (n > 255) - { - *errorcodeptr = ERR38; - goto FAILED; - } - } - PUT(code, 1, (int)(ptr - cb->start_pattern + 1)); /* Next offset */ - PUT(code, 1 + LINK_SIZE, 0); /* Default length */ - code[1 + 2*LINK_SIZE] = n; /* Callout number */ - code += PRIV(OP_lengths)[OP_CALLOUT]; - } - - /* Both formats must have a closing parenthesis */ - - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR39; - goto FAILED; - } - - /* Callouts cannot be quantified. */ - - previous = NULL; - continue; - - - /* ------------------------------------------------------------ */ - case CHAR_P: /* Python-style named subpattern handling */ - if (*(++ptr) == CHAR_EQUALS_SIGN || - *ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */ - { - is_recurse = *ptr == CHAR_GREATER_THAN_SIGN; - terminator = CHAR_RIGHT_PARENTHESIS; - goto NAMED_REF_OR_RECURSE; - } - else if (*ptr != CHAR_LESS_THAN_SIGN) /* Test for Python-style defn */ - { - *errorcodeptr = ERR41; - goto FAILED; - } - /* Fall through to handle (?P< as (?< is handled */ - - - /* ------------------------------------------------------------ */ - case CHAR_APOSTROPHE: /* Define a name - note fall through above */ - - /* The syntax was checked and the list of names was set up in the - pre-pass, so there is nothing to be done now except to skip over the - name. */ - - terminator = (*ptr == CHAR_LESS_THAN_SIGN)? - CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; - while (*(++ptr) != (unsigned int)terminator); - ptr++; - goto NUMBERED_GROUP; /* Set up numbered group */ - - - /* ------------------------------------------------------------ */ - case CHAR_AMPERSAND: /* Perl recursion/subroutine syntax */ - terminator = CHAR_RIGHT_PARENTHESIS; - is_recurse = TRUE; - /* Fall through */ - - /* We come here from the Python syntax above that handles both - references (?P=name) and recursion (?P>name), as well as falling - through from the Perl recursion syntax (?&name). We also come here from - the Perl \k or \k'name' back reference syntax and the \k{name} - .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */ - - NAMED_REF_OR_RECURSE: - name = ++ptr; - if (IS_DIGIT(*ptr)) - { - *errorcodeptr = ERR44; /* Group name must start with non-digit */ - goto FAILED; - } - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = (int)(ptr - name); - - /* In the pre-compile phase, do a syntax check. */ - - if (lengthptr != NULL) - { - if (namelen == 0) - { - *errorcodeptr = ERR62; - goto FAILED; - } - if (*ptr != (PCRE2_UCHAR)terminator) - { - *errorcodeptr = ERR42; - goto FAILED; - } - if (namelen > MAX_NAME_SIZE) - { - *errorcodeptr = ERR48; - goto FAILED; - } - } - - /* Scan the list of names generated in the pre-pass in order to get - a number and whether or not this name is duplicated. */ - - recno = 0; - is_dupname = FALSE; - ng = cb->named_groups; - - for (i = 0; i < cb->names_found; i++, ng++) - { - if (namelen == ng->length && - PRIV(strncmp)(name, ng->name, namelen) == 0) - { - open_capitem *oc; - is_dupname = ng->isdup; - recno = ng->number; - - /* For a recursion, that's all that is needed. We can now go to the - code that handles numerical recursion. */ - - if (is_recurse) goto HANDLE_RECURSION; - - /* For a back reference, update the back reference map and the - maximum back reference. Then for each group we must check to see if - it is recursive, that is, it is inside the group that it - references. A flag is set so that the group can be made atomic. */ - - cb->backref_map |= (recno < 32)? (1u << recno) : 1; - if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; - - for (oc = cb->open_caps; oc != NULL; oc = oc->next) - { - if (oc->number == recno) - { - oc->flag = TRUE; - break; - } - } - } - } - - /* If the name was not found we have a bad reference. */ - - if (recno == 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } - - /* If a back reference name is not duplicated, we can handle it as a - numerical reference. */ - - if (!is_dupname) goto HANDLE_REFERENCE; - - /* If a back reference name is duplicated, we generate a different - opcode to a numerical back reference. In the second pass we must search - for the index and count in the final name table. */ - - count = 0; - index = 0; - - if (lengthptr == NULL) - { - slot = cb->name_table; - for (i = 0; i < cb->names_found; i++) - { - if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0 && - slot[IMM2_SIZE+namelen] == 0) - { - if (count == 0) index = i; - count++; - } - slot += cb->name_entry_size; - } - - if (count == 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } - } - - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF; - PUT2INC(code, 0, index); - PUT2INC(code, 0, count); - continue; /* End of back ref handling */ - - - /* ------------------------------------------------------------ */ - case CHAR_R: /* Recursion */ - ptr++; /* Same as (?0) */ - /* Fall through */ - - - /* ------------------------------------------------------------ */ - case CHAR_MINUS: case CHAR_PLUS: /* Recursion or subroutine */ - case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: - case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - { - PCRE2_SPTR called; - terminator = CHAR_RIGHT_PARENTHESIS; - - /* Come here from the \g<...> and \g'...' code (Oniguruma - compatibility). However, the syntax has been checked to ensure that - the ... are a (signed) number, so that neither ERR63 nor ERR29 will - be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY - ever be taken. */ - - HANDLE_NUMERICAL_RECURSION: - - if ((refsign = *ptr) == CHAR_PLUS) - { - ptr++; - if (!IS_DIGIT(*ptr)) - { - *errorcodeptr = ERR63; - goto FAILED; - } - } - else if (refsign == CHAR_MINUS) - { - if (!IS_DIGIT(ptr[1])) - goto OTHER_CHAR_AFTER_QUERY; - ptr++; - } - - recno = 0; - while (IS_DIGIT(*ptr)) - { - if (recno > INT_MAX / 10 - 1) /* Integer overflow */ - { - while (IS_DIGIT(*ptr)) ptr++; - *errorcodeptr = ERR61; - goto FAILED; - } - recno = recno * 10 + *ptr++ - CHAR_0; - } - - if (*ptr != (PCRE2_UCHAR)terminator) - { - *errorcodeptr = ERR29; - goto FAILED; - } - - if (refsign == CHAR_MINUS) - { - if (recno == 0) - { - *errorcodeptr = ERR58; - goto FAILED; - } - recno = cb->bracount - recno + 1; - if (recno <= 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } - } - else if (refsign == CHAR_PLUS) - { - if (recno == 0) - { - *errorcodeptr = ERR58; - goto FAILED; - } - recno += cb->bracount; - } - - /* Come here from code above that handles a named recursion */ - - HANDLE_RECURSION: - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - called = cb->start_code; - - /* When we are actually compiling, find the bracket that is being - referenced. Temporarily end the regex in case it doesn't exist before - this point. If we end up with a forward reference, first check that - the bracket does occur later so we can give the error (and position) - now. Then remember this forward reference in the workspace so it can - be filled in at the end. */ - - if (lengthptr == NULL) - { - *code = OP_END; - if (recno != 0) - called = PRIV(find_bracket)(cb->start_code, utf, recno); - - /* Forward reference */ - - if (called == NULL) - { - if ((uint32_t)recno > cb->final_bracount) - { - *errorcodeptr = ERR15; - goto FAILED; - } - - /* Fudge the value of "called" so that when it is inserted as an - offset below, what it actually inserted is the reference number - of the group. Then remember the forward reference, expanding the - working space where the list is kept if necessary. */ - - called = cb->start_code + recno; - if (cb->hwm >= cb->start_workspace + cb->workspace_size - - WORK_SIZE_SAFETY_MARGIN) - { - *errorcodeptr = expand_workspace(cb); - if (*errorcodeptr != 0) goto FAILED; - } - PUTINC(cb->hwm, 0, (int)(code + 1 - cb->start_code)); - } - - /* If not a forward reference, and the subpattern is still open, - this is a recursive call. We check to see if this is a left - recursion that could loop for ever, and diagnose that case. We - must not, however, do this check if we are in a conditional - subpattern because the condition might be testing for recursion in - a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid. - Forever loops are also detected at runtime, so those that occur in - conditional subpatterns will be picked up then. */ - - else if (GET(called, 1) == 0 && cond_depth <= 0 && - could_be_empty(called, code, bcptr, utf, cb)) - { - *errorcodeptr = ERR40; - goto FAILED; - } - } - - /* Insert the recursion/subroutine item. It does not have a set first - character (relevant if it is repeated, because it will then be - wrapped with ONCE brackets). */ - - *code = OP_RECURSE; - PUT(code, 1, (int)(called - cb->start_code)); - code += 1 + LINK_SIZE; - groupsetfirstcu = FALSE; - } - - /* Can't determine a first byte now */ - - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - continue; - - - /* ------------------------------------------------------------ */ - default: /* Other characters: check option setting */ - OTHER_CHAR_AFTER_QUERY: - set = unset = 0; - optset = &set; - - while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) - { - switch (*ptr++) - { - case CHAR_MINUS: optset = &unset; break; - - case CHAR_J: /* Record that it changed in the external options */ - *optset |= PCRE2_DUPNAMES; - cb->external_flags |= PCRE2_JCHANGED; - break; - - case CHAR_i: *optset |= PCRE2_CASELESS; break; - case CHAR_m: *optset |= PCRE2_MULTILINE; break; - case CHAR_s: *optset |= PCRE2_DOTALL; break; - case CHAR_x: *optset |= PCRE2_EXTENDED; break; - case CHAR_U: *optset |= PCRE2_UNGREEDY; break; - - default: *errorcodeptr = ERR11; - ptr--; /* Correct the offset */ - goto FAILED; - } - } - - /* Set up the changed option bits, but don't change anything yet. */ - - newoptions = (options | set) & (~unset); - - /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. If this - item is right at the start of the pattern, the options can be - abstracted and made external in the pre-compile phase, and ignored in - the compile phase. This can be helpful when matching -- for instance in - caseless checking of required bytes. - - If the code pointer is not (cb->start_code + 1 + LINK_SIZE), we are - definitely *not* at the start of the pattern because something has been - compiled. In the pre-compile phase, however, the code pointer can have - that value after the start, because it gets reset as code is discarded - during the pre-compile. However, this can happen only at top level - if - we are within parentheses, the starting BRA will still be present. At - any parenthesis level, the length value can be used to test if anything - has been compiled at that level. Thus, a test for both these conditions - is necessary to ensure we correctly detect the start of the pattern in - both phases. - - If we are not at the pattern start, reset the greedy defaults and the - case value for firstcu and reqcu. */ - - if (*ptr == CHAR_RIGHT_PARENTHESIS) - { - if (code == cb->start_code + 1 + LINK_SIZE && - (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE)) - { - cb->external_options = newoptions; - } - else - { - greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0); - greedy_non_default = greedy_default ^ 1; - req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; - } - - /* Change options at this level, and pass them back for use - in subsequent branches. */ - - *optionsptr = options = newoptions; - previous = NULL; /* This item can't be repeated */ - continue; /* It is complete */ - } - - /* If the options ended with ':' we are heading into a nested group - with possible change of options. Such groups are non-capturing and are - not assertions of any kind. All we need to do is skip over the ':'; - the newoptions value is handled below. */ - - bravalue = OP_BRA; - ptr++; - } /* End of switch for character following (? */ - } /* End of (? handling */ - - /* Opening parenthesis not followed by '*' or '?'. If PCRE2_NO_AUTO_CAPTURE - is set, all unadorned brackets become non-capturing and behave like (?:...) - brackets. */ - - else if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) - { - bravalue = OP_BRA; - } - - /* Else we have a capturing group. */ - - else - { - NUMBERED_GROUP: - cb->bracount += 1; - PUT2(code, 1+LINK_SIZE, cb->bracount); - skipunits = IMM2_SIZE; - } - - /* Process nested bracketed regex. First check for parentheses nested too - deeply. */ - - if ((cb->parens_depth += 1) > (int)(cb->cx->parens_nest_limit)) - { - *errorcodeptr = ERR19; - goto FAILED; - } - - /* All assertions used not to be repeatable, but this was changed for Perl - compatibility. All kinds can now be repeated except for assertions that are - conditions (Perl also forbids these to be repeated). We copy code into a - non-register variable (tempcode) in order to be able to pass its address - because some compilers complain otherwise. At the start of a conditional - group whose condition is an assertion, cb->iscondassert is set. We unset it - here so as to allow assertions later in the group to be quantified. */ - - if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT && - cb->iscondassert) - { - previous = NULL; - cb->iscondassert = FALSE; - } - else - { - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - } - - *code = bravalue; - tempcode = code; - tempreqvary = cb->req_varyopt; /* Save value before bracket */ - tempbracount = cb->bracount; /* Save value before bracket */ - length_prevgroup = 0; /* Initialize for pre-compile phase */ - - if (!compile_regex( - newoptions, /* The complete new option state */ - &tempcode, /* Where to put code (updated) */ - &ptr, /* Input pointer (updated) */ - errorcodeptr, /* Where to put an error message */ - (bravalue == OP_ASSERTBACK || - bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ - reset_bracount, /* True if (?| group */ - skipunits, /* Skip over bracket number */ - cond_depth + - ((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */ - &subfirstcu, /* For possible first char */ - &subfirstcuflags, - &subreqcu, /* For possible last char */ - &subreqcuflags, - bcptr, /* Current branch chain */ - cb, /* Compile data block */ - (lengthptr == NULL)? NULL : /* Actual compile phase */ - &length_prevgroup /* Pre-compile phase */ - )) - goto FAILED; - - cb->parens_depth -= 1; - - /* If this was an atomic group and there are no capturing groups within it, - generate OP_ONCE_NC instead of OP_ONCE. */ - - if (bravalue == OP_ONCE && cb->bracount <= tempbracount) - *code = OP_ONCE_NC; - - if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) - cb->assert_depth -= 1; - - /* At the end of compiling, code is still pointing to the start of the - group, while tempcode has been updated to point past the end of the group. - The pattern pointer (ptr) is on the bracket. - - If this is a conditional bracket, check that there are no more than - two branches in the group, or just one if it's a DEFINE group. We do this - in the real compile phase, not in the pre-pass, where the whole group may - not be available. */ - - if (bravalue == OP_COND && lengthptr == NULL) - { - PCRE2_UCHAR *tc = code; - int condcount = 0; - - do { - condcount++; - tc += GET(tc,1); - } - while (*tc != OP_KET); - - /* A DEFINE group is never obeyed inline (the "condition" is always - false). It must have only one branch. Having checked this, change the - opcode to OP_FALSE. */ - - if (code[LINK_SIZE+1] == OP_DEFINE) - { - if (condcount > 1) - { - *errorcodeptr = ERR54; - goto FAILED; - } - code[LINK_SIZE+1] = OP_FALSE; - bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */ - } - - /* A "normal" conditional group. If there is just one branch, we must not - make use of its firstcu or reqcu, because this is equivalent to an - empty second branch. */ - - else - { - if (condcount > 2) - { - *errorcodeptr = ERR27; - goto FAILED; - } - if (condcount == 1) subfirstcuflags = subreqcuflags = REQ_NONE; - } - } - - /* Error if hit end of pattern */ - - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR14; - goto FAILED; - } - - /* In the pre-compile phase, update the length by the length of the group, - less the brackets at either end. Then reduce the compiled code to just a - set of non-capturing brackets so that it doesn't use much memory if it is - duplicated by a quantifier.*/ - - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) - { - *errorcodeptr = ERR20; - goto FAILED; - } - *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; - code++; /* This already contains bravalue */ - PUTINC(code, 0, 1 + LINK_SIZE); - *code++ = OP_KET; - PUTINC(code, 0, 1 + LINK_SIZE); - break; /* No need to waste time with special character handling */ - } - - /* Otherwise update the main code pointer to the end of the group. */ - - code = tempcode; - - /* For a DEFINE group, required and first character settings are not - relevant. */ - - if (bravalue == OP_DEFINE) break; - - /* Handle updating of the required and first characters for other types of - group. Update for normal brackets of all kinds, and conditions with two - branches (see code above). If the bracket is followed by a quantifier with - zero repeat, we have to back off. Hence the definition of zeroreqcu and - zerofirstcu outside the main loop so that they can be accessed for the - back off. */ - - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - groupsetfirstcu = FALSE; - - if (bravalue >= OP_ONCE) - { - /* If we have not yet set a firstcu in this branch, take it from the - subpattern, remembering that it was set here so that a repeat of more - than one can replicate it as reqcu if necessary. If the subpattern has - no firstcu, set "none" for the whole branch. In both cases, a zero - repeat forces firstcu to "none". */ - - if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) - { - if (subfirstcuflags >= 0) - { - firstcu = subfirstcu; - firstcuflags = subfirstcuflags; - groupsetfirstcu = TRUE; - } - else firstcuflags = REQ_NONE; - zerofirstcuflags = REQ_NONE; - } - - /* If firstcu was previously set, convert the subpattern's firstcu - into reqcu if there wasn't one, using the vary flag that was in - existence beforehand. */ - - else if (subfirstcuflags >= 0 && subreqcuflags < 0) - { - subreqcu = subfirstcu; - subreqcuflags = subfirstcuflags | tempreqvary; - } - - /* If the subpattern set a required byte (or set a first byte that isn't - really the first byte - see above), set it. */ - - if (subreqcuflags >= 0) - { - reqcu = subreqcu; - reqcuflags = subreqcuflags; - } - } - - /* For a forward assertion, we take the reqcu, if set. This can be - helpful if the pattern that follows the assertion doesn't set a different - char. For example, it's useful for /(?=abcde).+/. We can't set firstcu - for an assertion, however because it leads to incorrect effect for patterns - such as /(?=a)a.+/ when the "real" "a" would then become a reqcu instead - of a firstcu. This is overcome by a scan at the end if there's no - firstcu, looking for an asserted first char. */ - - else if (bravalue == OP_ASSERT && subreqcuflags >= 0) - { - reqcu = subreqcu; - reqcuflags = subreqcuflags; - } - break; /* End of processing '(' */ - - - /* ===================================================================*/ - /* Handle metasequences introduced by \. For ones like \d, the ESC_ values - are arranged to be the negation of the corresponding OP_values in the - default case when PCRE2_UCP is not set. For the back references, the values - are negative the reference number. Only back references and those types - that consume a character may be repeated. We can test for values between - ESC_b and ESC_Z for the latter; this may have to change if any new ones are - ever created. */ - - case CHAR_BACKSLASH: - tempptr = ptr; - escape = check_escape(&ptr, &ec, errorcodeptr, options, FALSE, cb); - if (*errorcodeptr != 0) goto FAILED; - - if (escape == 0) /* The escape coded a single character */ - c = ec; - else - { - if (escape == ESC_Q) /* Handle start of quoted string */ - { - if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - ptr += 2; /* avoid empty string */ - else inescq = TRUE; - continue; - } - - if (escape == ESC_E) continue; /* Perl ignores an orphan \E */ - - /* For metasequences that actually match a character, we disable the - setting of a first character if it hasn't already been set. */ - - if (firstcuflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z) - firstcuflags = REQ_NONE; - - /* Set values to reset to if this is followed by a zero repeat. */ - - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - - /* \g or \g'name' is a subroutine call by name and \g or \g'n' - is a subroutine call by number (Oniguruma syntax). In fact, the value - ESC_g is returned only for these cases. So we don't need to check for < - or ' if the value is ESC_g. For the Perl syntax \g{n} the value is - -n, and for the Perl syntax \g{name} the result is ESC_k (as - that is a synonym for a named back reference). */ - - if (escape == ESC_g) - { - PCRE2_SPTR p; - uint32_t cf; - - terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? - CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; - - /* These two statements stop the compiler for warning about possibly - unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In - fact, because we do the check for a number below, the paths that - would actually be in error are never taken. */ - - skipunits = 0; - reset_bracount = FALSE; - - /* If it's not a signed or unsigned number, treat it as a name. */ - - cf = ptr[1]; - if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf)) - { - is_recurse = TRUE; - goto NAMED_REF_OR_RECURSE; - } - - /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus - or a digit. */ - - p = ptr + 2; - while (IS_DIGIT(*p)) p++; - if (*p != (PCRE2_UCHAR)terminator) - { - *errorcodeptr = ERR57; - break; - } - ptr++; - goto HANDLE_NUMERICAL_RECURSION; - } - - /* \k or \k'name' is a back reference by name (Perl syntax). - We also support \k{name} (.NET syntax). */ - - if (escape == ESC_k) - { - if ((ptr[1] != CHAR_LESS_THAN_SIGN && - ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET)) - { - *errorcodeptr = ERR69; - break; - } - is_recurse = FALSE; - terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? - CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? - CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; - goto NAMED_REF_OR_RECURSE; - } - - /* Back references are handled specially; must disable firstcu if - not set to cope with cases like (?=(\w+))\1: which would otherwise set - ':' later. */ - - if (escape < 0) - { - open_capitem *oc; - recno = -escape; - - /* Come here from named backref handling when the reference is to a - single group (i.e. not to a duplicated name). */ - - HANDLE_REFERENCE: - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; - PUT2INC(code, 0, recno); - cb->backref_map |= (recno < 32)? (1u << recno) : 1; - if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; - - /* Check to see if this back reference is recursive, that it, it - is inside the group that it references. A flag is set so that the - group can be made atomic. */ - - for (oc = cb->open_caps; oc != NULL; oc = oc->next) - { - if (oc->number == recno) - { - oc->flag = TRUE; - break; - } - } - } - - /* So are Unicode property matches, if supported. */ - -#ifdef SUPPORT_UNICODE - else if (escape == ESC_P || escape == ESC_p) - { - BOOL negated; - unsigned int ptype = 0, pdata = 0; - if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb)) - goto FAILED; - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; - *code++ = ptype; - *code++ = pdata; - } -#else - - /* If Unicode properties are not supported, \X, \P, and \p are not - allowed. */ - - else if (escape == ESC_X || escape == ESC_P || escape == ESC_p) - { - *errorcodeptr = ERR45; - goto FAILED; - } -#endif - - /* The use of \C can be locked out. */ - - else if (escape == ESC_C && (options & PCRE2_NEVER_BACKSLASH_C) != 0) - { - *errorcodeptr = ERR83; - goto FAILED; - } - - /* For the rest (including \X when Unicode properties are supported), we - can obtain the OP value by negating the escape value in the default - situation when PCRE2_UCP is not set. When it *is* set, we substitute - Unicode property tests. Note that \b and \B do a one-character - lookbehind, and \A also behaves as if it does. */ - - else - { - if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && - cb->max_lookbehind == 0) - cb->max_lookbehind = 1; -#ifdef SUPPORT_UNICODE - if (escape >= ESC_DU && escape <= ESC_wu) - { - nestptr = ptr + 1; /* Where to resume */ - ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ - } - else -#endif - /* In non-UTF mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE - so that it works in DFA mode and in lookbehinds. */ - - { - previous = (escape > ESC_b && escape < ESC_Z)? code : NULL; - item_hwm_offset = cb->hwm - cb->start_workspace; - *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape; - } - } - continue; - } - - /* We have a data character whose value is in c. In UTF-8 mode it may have - a value > 127. We set its representation in the length/buffer, and then - handle it as a data character. */ - - mclength = PUTCHAR(c, mcbuffer); - goto ONE_CHAR; - - - /* ===================================================================*/ - /* Handle a literal character. It is guaranteed not to be whitespace or # - when the extended flag is set. If we are in a UTF mode, it may be a - multi-unit literal character. */ - - default: - NORMAL_CHAR: - mclength = 1; - mcbuffer[0] = c; - -#ifdef SUPPORT_UNICODE - if (utf && HAS_EXTRALEN(c)) - ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); -#endif - - /* At this point we have the character's bytes in mcbuffer, and the length - in mclength. When not in UTF mode, the length is always 1. */ - - ONE_CHAR: - previous = code; - item_hwm_offset = cb->hwm - cb->start_workspace; - - /* For caseless UTF mode, check whether this character has more than one - other case. If so, generate a special OP_PROP item instead of OP_CHARI. */ - -#ifdef SUPPORT_UNICODE - if (utf && (options & PCRE2_CASELESS) != 0) - { - GETCHAR(c, mcbuffer); - if ((c = UCD_CASESET(c)) != 0) - { - *code++ = OP_PROP; - *code++ = PT_CLIST; - *code++ = c; - if (firstcuflags == REQ_UNSET) - firstcuflags = zerofirstcuflags = REQ_NONE; - break; - } - } -#endif - - /* Caseful matches, or not one of the multicase characters. */ - - *code++ = ((options & PCRE2_CASELESS) != 0)? OP_CHARI : OP_CHAR; - for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; - - /* Remember if \r or \n were seen */ - - if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL) - cb->external_flags |= PCRE2_HASCRORLF; - - /* Set the first and required bytes appropriately. If no previous first - byte, set it from this character, but revert to none on a zero repeat. - Otherwise, leave the firstcu value alone, and don't change it on a zero - repeat. */ - - if (firstcuflags == REQ_UNSET) - { - zerofirstcuflags = REQ_NONE; - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - - /* If the character is more than one byte long, we can set firstcu - only if it is not to be matched caselessly. */ - - if (mclength == 1 || req_caseopt == 0) - { - firstcu = mcbuffer[0] | req_caseopt; - firstcu = mcbuffer[0]; - firstcuflags = req_caseopt; - - if (mclength != 1) - { - reqcu = code[-1]; - reqcuflags = cb->req_varyopt; - } - } - else firstcuflags = reqcuflags = REQ_NONE; - } - - /* firstcu was previously set; we can set reqcu only if the length is - 1 or the matching is caseful. */ - - else - { - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - if (mclength == 1 || req_caseopt == 0) - { - reqcu = code[-1]; - reqcuflags = req_caseopt | cb->req_varyopt; - } - } - - break; /* End of literal character handling */ - } - } /* end of big loop */ - -/* Control never reaches here by falling through, only by a goto for all the -error states. Pass back the position in the pattern so that it can be displayed -to the user for diagnosing the error. */ - -FAILED: -*ptrptr = ptr; -return FALSE; -} - - - -/************************************************* -* Compile regex: a sequence of alternatives * -*************************************************/ - -/* On entry, ptr is pointing past the bracket character, but on return it -points to the closing bracket, or vertical bar, or end of string. The code -variable is pointing at the byte into which the BRA operator has been stored. -This function is used during the pre-compile phase when we are trying to find -out the amount of memory needed, as well as during the real compile phase. The -value of lengthptr distinguishes the two phases. - -Arguments: - options option bits, including any changes for this subpattern - codeptr -> the address of the current code pointer - ptrptr -> the address of the current pattern pointer - errorcodeptr -> pointer to error code variable - lookbehind TRUE if this is a lookbehind assertion - reset_bracount TRUE to reset the count for each branch - skipunits skip this many code units at start (for brackets and OP_COND) - cond_depth depth of nesting for conditional subpatterns - firstcuptr place to put the first required code unit - firstcuflagsptr place to put the first code unit flags, or a negative number - reqcuptr place to put the last required code unit - reqcuflagsptr place to put the last required code unit flags, or a negative number - bcptr pointer to the chain of currently open branches - cb points to the data block with tables pointers etc. - lengthptr NULL during the real compile phase - points to length accumulator during pre-compile phase - -Returns: TRUE on success -*/ - -static BOOL -compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr, - int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, uint32_t skipunits, - int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr, - uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr, - compile_block *cb, size_t *lengthptr) -{ -PCRE2_SPTR ptr = *ptrptr; -PCRE2_UCHAR *code = *codeptr; -PCRE2_UCHAR *last_branch = code; -PCRE2_UCHAR *start_bracket = code; -PCRE2_UCHAR *reverse_count = NULL; -open_capitem capitem; -int capnumber = 0; -uint32_t firstcu, reqcu; -int32_t firstcuflags, reqcuflags; -uint32_t branchfirstcu, branchreqcu; -int32_t branchfirstcuflags, branchreqcuflags; -size_t length; -size_t save_hwm_offset; -unsigned int orig_bracount; -unsigned int max_bracount; -branch_chain bc; - -/* If set, call the external function that checks for stack availability. */ - -if (cb->cx->stack_guard != NULL && - cb->cx->stack_guard(cb->parens_depth, cb->cx->stack_guard_data)) - { - *errorcodeptr= ERR33; - return FALSE; - } - -/* Miscellaneous initialization */ - -bc.outer = bcptr; -bc.current_branch = code; - -firstcu = reqcu = 0; -firstcuflags = reqcuflags = REQ_UNSET; - -save_hwm_offset = cb->hwm - cb->start_workspace; /* hwm at start of group */ - -/* Accumulate the length for use in the pre-compile phase. Start with the -length of the BRA and KET and any extra code units that are required at the -beginning. We accumulate in a local variable to save frequent testing of -lengthptr for NULL. We cannot do this by looking at the value of 'code' at the -start and end of each alternative, because compiled items are discarded during -the pre-compile phase so that the work space is not exceeded. */ - -length = 2 + 2*LINK_SIZE + skipunits; - -/* WARNING: If the above line is changed for any reason, you must also change -the code that abstracts option settings at the start of the pattern and makes -them global. It tests the value of length for (2 + 2*LINK_SIZE) in the -pre-compile phase to find out whether or not anything has yet been compiled. - -If this is a capturing subpattern, add to the chain of open capturing items -so that we can detect them if (*ACCEPT) is encountered. This is also used to -detect groups that contain recursive back references to themselves. Note that -only OP_CBRA need be tested here; changing this opcode to one of its variants, -e.g. OP_SCBRAPOS, happens later, after the group has been compiled. */ - -if (*code == OP_CBRA) - { - capnumber = GET2(code, 1 + LINK_SIZE); - capitem.number = capnumber; - capitem.next = cb->open_caps; - capitem.flag = FALSE; - cb->open_caps = &capitem; - } - -/* Offset is set zero to mark that this bracket is still open */ - -PUT(code, 1, 0); -code += 1 + LINK_SIZE + skipunits; - -/* Loop for each alternative branch */ - -orig_bracount = max_bracount = cb->bracount; - -for (;;) - { - /* For a (?| group, reset the capturing bracket count so that each branch - uses the same numbers. */ - - if (reset_bracount) cb->bracount = orig_bracount; - - /* Set up dummy OP_REVERSE if lookbehind assertion */ - - if (lookbehind) - { - *code++ = OP_REVERSE; - reverse_count = code; - PUTINC(code, 0, 0); - length += 1 + LINK_SIZE; - } - - /* Now compile the branch; in the pre-compile phase its length gets added - into the length. */ - - if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstcu, - &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc, - cond_depth, cb, (lengthptr == NULL)? NULL : &length)) - { - *ptrptr = ptr; - return FALSE; - } - - /* Keep the highest bracket count in case (?| was used and some branch - has fewer than the rest. */ - - if (cb->bracount > max_bracount) max_bracount = cb->bracount; - - /* In the real compile phase, there is some post-processing to be done. */ - - if (lengthptr == NULL) - { - /* If this is the first branch, the firstcu and reqcu values for the - branch become the values for the regex. */ - - if (*last_branch != OP_ALT) - { - firstcu = branchfirstcu; - firstcuflags = branchfirstcuflags; - reqcu = branchreqcu; - reqcuflags = branchreqcuflags; - } - - /* If this is not the first branch, the first char and reqcu have to - match the values from all the previous branches, except that if the - previous value for reqcu didn't have REQ_VARY set, it can still match, - and we set REQ_VARY for the regex. */ - - else - { - /* If we previously had a firstcu, but it doesn't match the new branch, - we have to abandon the firstcu for the regex, but if there was - previously no reqcu, it takes on the value of the old firstcu. */ - - if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu) - { - if (firstcuflags >= 0) - { - if (reqcuflags < 0) - { - reqcu = firstcu; - reqcuflags = firstcuflags; - } - } - firstcuflags = REQ_NONE; - } - - /* If we (now or from before) have no firstcu, a firstcu from the - branch becomes a reqcu if there isn't a branch reqcu. */ - - if (firstcuflags < 0 && branchfirstcuflags >= 0 && - branchreqcuflags < 0) - { - branchreqcu = branchfirstcu; - branchreqcuflags = branchfirstcuflags; - } - - /* Now ensure that the reqcus match */ - - if (((reqcuflags & ~REQ_VARY) != (branchreqcuflags & ~REQ_VARY)) || - reqcu != branchreqcu) - reqcuflags = REQ_NONE; - else - { - reqcu = branchreqcu; - reqcuflags |= branchreqcuflags; /* To "or" REQ_VARY */ - } - } - - /* If lookbehind, check that this branch matches a fixed-length string, and - put the length into the OP_REVERSE item. Temporarily mark the end of the - branch with OP_END. If the branch contains OP_RECURSE, the result is -3 - because there may be forward references that we can't check here. Set a - flag to cause another lookbehind check at the end. Why not do it all at the - end? Because common, erroneous checks are picked up here and the offset of - the problem can be shown. */ - - if (lookbehind) - { - int fixed_length; - *code = OP_END; - fixed_length = find_fixedlength(last_branch, (options & PCRE2_UTF) != 0, - FALSE, cb, NULL); - if (fixed_length == -3) - { - cb->check_lookbehind = TRUE; - } - else if (fixed_length < 0) - { - *errorcodeptr = (fixed_length == -2)? ERR36 : - (fixed_length == -4)? ERR70: ERR25; - *ptrptr = ptr; - return FALSE; - } - else - { - if (fixed_length > cb->max_lookbehind) - cb->max_lookbehind = fixed_length; - PUT(reverse_count, 0, fixed_length); - } - } - } - - /* Reached end of expression, either ')' or end of pattern. In the real - compile phase, go back through the alternative branches and reverse the chain - of offsets, with the field in the BRA item now becoming an offset to the - first alternative. If there are no alternatives, it points to the end of the - group. The length in the terminating ket is always the length of the whole - bracketed item. Return leaving the pointer at the terminating char. */ - - if (*ptr != CHAR_VERTICAL_LINE) - { - if (lengthptr == NULL) - { - size_t branch_length = code - last_branch; - do - { - size_t prev_length = GET(last_branch, 1); - PUT(last_branch, 1, branch_length); - branch_length = prev_length; - last_branch -= branch_length; - } - while (branch_length > 0); - } - - /* Fill in the ket */ - - *code = OP_KET; - PUT(code, 1, (int)(code - start_bracket)); - code += 1 + LINK_SIZE; - - /* If it was a capturing subpattern, check to see if it contained any - recursive back references. If so, we must wrap it in atomic brackets. - Because we are moving code along, we must ensure that any pending recursive - or forward subroutine references are updated. In any event, remove the - block from the chain. */ - - if (capnumber > 0) - { - if (cb->open_caps->flag) - { - *code = OP_END; - adjust_recurse(start_bracket, 1 + LINK_SIZE, - (options & PCRE2_UTF) != 0, cb, save_hwm_offset); - memmove(start_bracket + 1 + LINK_SIZE, start_bracket, - CU2BYTES(code - start_bracket)); - *start_bracket = OP_ONCE; - code += 1 + LINK_SIZE; - PUT(start_bracket, 1, (int)(code - start_bracket)); - *code = OP_KET; - PUT(code, 1, (int)(code - start_bracket)); - code += 1 + LINK_SIZE; - length += 2 + 2*LINK_SIZE; - } - cb->open_caps = cb->open_caps->next; - } - - /* Retain the highest bracket number, in case resetting was used. */ - - cb->bracount = max_bracount; - - /* Set values to pass back */ - - *codeptr = code; - *ptrptr = ptr; - *firstcuptr = firstcu; - *firstcuflagsptr = firstcuflags; - *reqcuptr = reqcu; - *reqcuflagsptr = reqcuflags; - if (lengthptr != NULL) - { - if (OFLOW_MAX - *lengthptr < length) - { - *errorcodeptr = ERR20; - return FALSE; - } - *lengthptr += length; - } - return TRUE; - } - - /* Another branch follows. In the pre-compile phase, we can move the code - pointer back to where it was for the start of the first branch. (That is, - pretend that each branch is the only one.) - - In the real compile phase, insert an ALT node. Its length field points back - to the previous branch while the bracket remains open. At the end the chain - is reversed. It's done like this so that the start of the bracket has a - zero offset until it is closed, making it possible to detect recursion. */ - - if (lengthptr != NULL) - { - code = *codeptr + 1 + LINK_SIZE + skipunits; - length += 1 + LINK_SIZE; - } - else - { - *code = OP_ALT; - PUT(code, 1, (int)(code - last_branch)); - bc.current_branch = last_branch = code; - code += 1 + LINK_SIZE; - } - - /* Advance past the vertical bar */ - - ptr++; - } -/* Control never reaches here */ -} - - - -/************************************************* -* Check for anchored pattern * -*************************************************/ - -/* Try to find out if this is an anchored regular expression. Consider each -alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket -all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then -it's anchored. However, if this is a multiline pattern, then only OP_SOD will -be found, because ^ generates OP_CIRCM in that mode. - -We can also consider a regex to be anchored if OP_SOM starts all its branches. -This is the code for \G, which means "match at start of match position, taking -into account the match offset". - -A branch is also implicitly anchored if it starts with .* and DOTALL is set, -because that will try the rest of the pattern at all possible matching points, -so there is no point trying again.... er .... - -.... except when the .* appears inside capturing parentheses, and there is a -subsequent back reference to those parentheses. We haven't enough information -to catch that case precisely. - -At first, the best we could do was to detect when .* was in capturing brackets -and the highest back reference was greater than or equal to that level. -However, by keeping a bitmap of the first 31 back references, we can catch some -of the more common cases more precisely. - -... A second exception is when the .* appears inside an atomic group, because -this prevents the number of characters it matches from being adjusted. - -Arguments: - code points to start of the compiled pattern - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach - cb points to the compile data block - atomcount atomic group level - -Returns: TRUE or FALSE -*/ - -static BOOL -is_anchored(register PCRE2_SPTR code, unsigned int bracket_map, - compile_block *cb, int atomcount) -{ -do { - PCRE2_SPTR scode = first_significant_code( - code + PRIV(OP_lengths)[*code], FALSE); - register int op = *scode; - - /* Non-capturing brackets */ - - if (op == OP_BRA || op == OP_BRAPOS || - op == OP_SBRA || op == OP_SBRAPOS) - { - if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE; - } - - /* Capturing brackets */ - - else if (op == OP_CBRA || op == OP_CBRAPOS || - op == OP_SCBRA || op == OP_SCBRAPOS) - { - int n = GET2(scode, 1+LINK_SIZE); - int new_map = bracket_map | ((n < 32)? (1u << n) : 1); - if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE; - } - - /* Positive forward assertions and conditions */ - - else if (op == OP_ASSERT || op == OP_COND) - { - if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE; - } - - /* Atomic groups */ - - else if (op == OP_ONCE || op == OP_ONCE_NC) - { - if (!is_anchored(scode, bracket_map, cb, atomcount + 1)) - return FALSE; - } - - /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and - it isn't in brackets that are or may be referenced or inside an atomic - group. There is also an option that disables auto-anchoring. */ - - else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || - op == OP_TYPEPOSSTAR)) - { - if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 || - atomcount > 0 || cb->had_pruneorskip || - (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) - return FALSE; - } - - /* Check for explicit anchoring */ - - else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; - - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - -/************************************************* -* Check for starting with ^ or .* * -*************************************************/ - -/* This is called to find out if every branch starts with ^ or .* so that -"first char" processing can be done to speed things up in multiline -matching and for non-DOTALL patterns that start with .* (which must start at -the beginning or after \n). As in the case of is_anchored() (see above), we -have to take account of back references to capturing brackets that contain .* -because in that case we can't make the assumption. Also, the appearance of .* -inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not -count, because once again the assumption no longer holds. - -Arguments: - code points to start of the compiled pattern or a group - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach - cb points to the compile data - atomcount atomic group level - -Returns: TRUE or FALSE -*/ - -static BOOL -is_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, - int atomcount) -{ -do { - PCRE2_SPTR scode = first_significant_code( - code + PRIV(OP_lengths)[*code], FALSE); - register int op = *scode; - - /* If we are at the start of a conditional assertion group, *both* the - conditional assertion *and* what follows the condition must satisfy the test - for start of line. Other kinds of condition fail. Note that there may be an - auto-callout at the start of a condition. */ - - if (op == OP_COND) - { - scode += 1 + LINK_SIZE; - - if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; - else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE); - - switch (*scode) - { - case OP_CREF: - case OP_DNCREF: - case OP_RREF: - case OP_DNRREF: - case OP_FAIL: - case OP_FALSE: - case OP_TRUE: - return FALSE; - - default: /* Assertion */ - if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE; - do scode += GET(scode, 1); while (*scode == OP_ALT); - scode += 1 + LINK_SIZE; - break; - } - scode = first_significant_code(scode, FALSE); - op = *scode; - } - - /* Non-capturing brackets */ - - if (op == OP_BRA || op == OP_BRAPOS || - op == OP_SBRA || op == OP_SBRAPOS) - { - if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE; - } - - /* Capturing brackets */ - - else if (op == OP_CBRA || op == OP_CBRAPOS || - op == OP_SCBRA || op == OP_SCBRAPOS) - { - int n = GET2(scode, 1+LINK_SIZE); - int new_map = bracket_map | ((n < 32)? (1u << n) : 1); - if (!is_startline(scode, new_map, cb, atomcount)) return FALSE; - } - - /* Positive forward assertions */ - - else if (op == OP_ASSERT) - { - if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE; - } - - /* Atomic brackets */ - - else if (op == OP_ONCE || op == OP_ONCE_NC) - { - if (!is_startline(scode, bracket_map, cb, atomcount + 1)) return FALSE; - } - - /* .* means "start at start or after \n" if it isn't in atomic brackets or - brackets that may be referenced, as long as the pattern does not contain - *PRUNE or *SKIP, because these break the feature. Consider, for example, - /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the - start of a line. There is also an option that disables this optimization. */ - - else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) - { - if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 || - atomcount > 0 || cb->had_pruneorskip || - (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) - return FALSE; - } - - /* Check for explicit circumflex; anything else gives a FALSE result. Note - in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC - because the number of characters matched by .* cannot be adjusted inside - them. */ - - else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; - - /* Move on to the next alternative */ - - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - /************************************************* * Check for asserted fixed first code unit * *************************************************/ @@ -7911,7 +7908,7 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags static uint32_t find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert) { -register uint32_t c = 0; +uint32_t c = 0; int cflags = REQ_NONE; *flags = REQ_NONE; @@ -7921,7 +7918,7 @@ do { int xl = (*code == OP_CBRA || *code == OP_SCBRA || *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); - register PCRE2_UCHAR op = *scode; + PCRE2_UCHAR op = *scode; switch(op) { @@ -7994,18 +7991,19 @@ Arguments: name the name to add length the length of the name groupno the group number + tablecount the count of names in the table so far Returns: nothing */ static void add_name_to_table(compile_block *cb, PCRE2_SPTR name, int length, - unsigned int groupno) + unsigned int groupno, uint32_t tablecount) { -int i; +uint32_t i; PCRE2_UCHAR *slot = cb->name_table; -for (i = 0; i < cb->names_found; i++) +for (i = 0; i < tablecount; i++) { int crc = memcmp(name, slot+IMM2_SIZE, CU2BYTES(length)); if (crc == 0 && slot[IMM2_SIZE+length] != 0) @@ -8019,7 +8017,7 @@ for (i = 0; i < cb->names_found; i++) if (crc < 0) { memmove(slot + cb->name_entry_size, slot, - CU2BYTES((cb->names_found - i) * cb->name_entry_size)); + CU2BYTES((tablecount - i) * cb->name_entry_size)); break; } @@ -8030,7 +8028,6 @@ for (i = 0; i < cb->names_found; i++) PUT2(slot, 0, groupno); memcpy(slot + IMM2_SIZE, name, CU2BYTES(length)); -cb->names_found++; /* Add a terminating zero and fill the rest of the slot with zeroes so that the memory is all initialized. Otherwise valgrind moans about uninitialized @@ -8042,6 +8039,703 @@ memset(slot + IMM2_SIZE + length, 0, +/************************************************* +* Skip in parsed pattern * +*************************************************/ + +/* This function is called to skip parts of the parsed pattern when finding the +length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find +the end of the branch, it is called to skip over an internal lookaround, and it +is also called to skip to the end of a class, during which it will never +encounter nested groups (but there's no need to have special code for that). + +Arguments: + pptr current pointer to skip from + skiptype PSKIP_CLASS when skipping to end of class + PSKIP_ALT when META_ALT ends the skip + PSKIP_KET when only META_KET ends the skip + +Returns: new value of pptr + NULL if META_END is reached - should never occur + or for an unknown meta value - likewise +*/ + +static uint32_t * +parsed_skip(uint32_t *pptr, uint32_t skiptype) +{ +uint32_t nestlevel = 0; + +for (pptr += 1;; pptr++) + { + uint32_t meta = META_CODE(*pptr); + + switch(meta) + { + default: /* Just skip over most items */ + if (meta < META_END) continue; /* Literal */ + break; + + /* This should never occur. */ + + case META_END: + return NULL; + + /* The data for these items is variable in length. */ + + case META_BACKREF: /* Offset is present only if group >= 10 */ + if (META_DATA(*pptr) >= 10) pptr += SIZEOFFSET; + break; + + case META_ESCAPE: /* A few escapes are followed by data items. */ + switch (META_DATA(*pptr)) + { + case ESC_P: + case ESC_p: + pptr += 1; + break; + + case ESC_g: + case ESC_k: + pptr += 1 + SIZEOFFSET; + break; + } + break; + + case META_MARK: /* Add the length of the name. */ + case META_PRUNE_ARG: + case META_SKIP_ARG: + case META_THEN_ARG: + pptr += pptr[1]; + break; + + /* These are the "active" items in this loop. */ + + case META_CLASS_END: + if (skiptype == PSKIP_CLASS) return pptr; + break; + + case META_ATOMIC: + case META_CAPTURE: + case META_COND_ASSERT: + case META_COND_DEFINE: + case META_COND_NAME: + case META_COND_NUMBER: + case META_COND_RNAME: + case META_COND_RNUMBER: + case META_COND_VERSION: + case META_LOOKAHEAD: + case META_LOOKAHEADNOT: + case META_LOOKBEHIND: + case META_LOOKBEHINDNOT: + case META_NOCAPTURE: + nestlevel++; + break; + + case META_ALT: + if (nestlevel == 0 && skiptype == PSKIP_ALT) return pptr; + break; + + case META_KET: + if (nestlevel == 0) return pptr; + nestlevel--; + break; + } + + /* The extra data item length for each meta is in a table. */ + + meta = (meta >> 16) & 0x7fff; + if (meta >= sizeof(meta_extra_lengths)) return NULL; + pptr += meta_extra_lengths[meta]; + } +/* Control never reaches here */ +return pptr; +} + + + +/************************************************* +* Find length of a parsed group * +*************************************************/ + +/* This is called for nested groups within a branch of a lookbehind whose +length is being computed. If all the branches in the nested group have the same +length, that is OK. On entry, the pointer must be at the first element after +the group initializing code. Caching is used to improve processing speed when +the same capturing group occurs many times. + +Arguments: + pptrptr pointer to pointer in the parsed pattern + errcodeptr pointer to the errorcode + lcptr pointer to the loop counter + group number of captured group or -1 for a non-capturing group + recurses chain of recurse_check to catch mutual recursion + cb pointer to the compile data + +Returns: the group length or a negative number +*/ + +static int +get_grouplength(uint32_t **pptrptr, int *errcodeptr, int *lcptr, + int group, parsed_recurse_check *recurses, compile_block *cb) +{ +int branchlength; +int grouplength = -1; + +/* The cache can be used only if there is no possibility of there being two +groups with the same number. */ + +if (group > 0) + { + uint32_t groupinfo = cb->groupinfo[group]; + if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0) + { + if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1; + if ((groupinfo & GI_SET_FIXED_LENGTH) != 0) + return groupinfo & GI_FIXED_LENGTH_MASK; + } + } + +/* Scan the group */ + +for(;;) + { + branchlength = get_branchlength(pptrptr, errcodeptr, lcptr, recurses, cb); + if (branchlength < 0) goto ISNOTFIXED; + if (grouplength == -1) grouplength = branchlength; + else if (grouplength != branchlength) goto ISNOTFIXED; + if (**pptrptr == META_KET) break; + *pptrptr += 1; /* Skip META_ALT */ + } + +if (group > 0) + cb->groupinfo[group] |= (uint32_t)(GI_SET_FIXED_LENGTH | grouplength); +return grouplength; + +ISNOTFIXED: +if (group > 0) cb->groupinfo[group] |= GI_NOT_FIXED_LENGTH; +return -1; +} + + + +/************************************************* +* Find length of a parsed branch * +*************************************************/ + +/* Return a fixed length for a branch in a lookbehind, giving an error if the +length is not fixed. If any lookbehinds are encountered on the way, they get +their length set. On entry, *pptrptr points to the first element inside the +branch. On exit it is set to point to the ALT or KET. + +Arguments: + pptrptr pointer to pointer in the parsed pattern + errcodeptr pointer to error code + lcptr pointer to loop counter + recurses chain of recurse_check to catch mutual recursion + cb pointer to compile block + +Returns: the length, or a negative value on error +*/ + +static int +get_branchlength(uint32_t **pptrptr, int *errcodeptr, int *lcptr, + parsed_recurse_check *recurses, compile_block *cb) +{ +int branchlength = 0; +int grouplength; +uint32_t lastitemlength = 0; +uint32_t *pptr = *pptrptr; +PCRE2_SIZE offset; +parsed_recurse_check this_recurse; + +/* A large and/or complex regex can take too long to process. This can happen +more often when (?| groups are present in the pattern because their length +cannot be cached. */ + +if ((*lcptr)++ > 2000) + { + *errcodeptr = ERR35; /* Lookbehind is too complicated */ + return -1; + } + +/* Scan the branch, accumulating the length. */ + +for (;; pptr++) + { + parsed_recurse_check *r; + uint32_t *gptr, *gptrend; + uint32_t escape; + uint32_t group = 0; + uint32_t itemlength = 0; + + if (*pptr < META_END) + { + itemlength = 1; + } + + else switch (META_CODE(*pptr)) + { + case META_KET: + case META_ALT: + goto EXIT; + + /* (*ACCEPT) and (*FAIL) terminate the branch, but we must skip to the + actual termination. */ + + case META_ACCEPT: + case META_FAIL: + pptr = parsed_skip(pptr, PSKIP_ALT); + if (pptr == NULL) goto PARSED_SKIP_FAILED; + goto EXIT; + + case META_MARK: + case META_PRUNE_ARG: + case META_SKIP_ARG: + case META_THEN_ARG: + pptr += pptr[1] + 1; + break; + + case META_CIRCUMFLEX: + case META_COMMIT: + case META_DOLLAR: + case META_PRUNE: + case META_SKIP: + case META_THEN: + break; + + case META_OPTIONS: + pptr += 1; + break; + + case META_BIGVALUE: + itemlength = 1; + pptr += 1; + break; + + case META_CLASS: + case META_CLASS_NOT: + itemlength = 1; + pptr = parsed_skip(pptr, PSKIP_CLASS); + if (pptr == NULL) goto PARSED_SKIP_FAILED; + break; + + case META_CLASS_EMPTY_NOT: + case META_DOT: + itemlength = 1; + break; + + case META_CALLOUT_NUMBER: + pptr += 3; + break; + + case META_CALLOUT_STRING: + pptr += 3 + SIZEOFFSET; + break; + + /* Only some escapes consume a character. Of those, \R and \X are never + allowed because they might match more than character. \C is allowed only in + 32-bit and non-UTF 8/16-bit modes. */ + + case META_ESCAPE: + escape = META_DATA(*pptr); + if (escape == ESC_R || escape == ESC_X) return -1; + if (escape > ESC_b && escape < ESC_Z) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + if ((cb->external_options & PCRE2_UTF) != 0 && escape == ESC_C) + { + *errcodeptr = ERR36; + return -1; + } +#endif + itemlength = 1; + if (escape == ESC_p || escape == ESC_P) pptr++; /* Skip prop data */ + } + break; + + /* Lookaheads can be ignored. */ + + case META_LOOKAHEAD: + case META_LOOKAHEADNOT: + pptr = parsed_skip(pptr, PSKIP_KET); + if (pptr == NULL) goto PARSED_SKIP_FAILED; + break; + + /* Lookbehinds can be ignored, but must themselves be checked. */ + + case META_LOOKBEHIND: + case META_LOOKBEHINDNOT: + if (!set_lookbehind_lengths(&pptr, errcodeptr, lcptr, recurses, cb)) + return -1; + break; + + /* Back references and recursions are handled by very similar code. At this + stage, the names generated in the parsing pass are available, but the main + name table has not yet been created. So for the named varieties, scan the + list of names in order to get the number of the first one in the pattern, + and whether or not this name is duplicated. */ + + case META_BACKREF_BYNAME: + if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0) + goto ISNOTFIXED; + + case META_RECURSE_BYNAME: + { + int i; + PCRE2_SPTR name; + BOOL is_dupname = FALSE; + named_group *ng = cb->named_groups; + uint32_t meta_code = META_CODE(*pptr); + uint32_t length = *(++pptr); + + GETPLUSOFFSET(offset, pptr); + name = cb->start_pattern + offset; + for (i = 0; i < cb->names_found; i++, ng++) + { + if (length == ng->length && PRIV(strncmp)(name, ng->name, length) == 0) + { + group = ng->number; + is_dupname = ng->isdup; + break; + } + } + + if (group == 0) + { + *errcodeptr = ERR15; /* Non-existent subpattern */ + cb->erroroffset = offset; + return -1; + } + + /* A numerical back reference can be fixed length if duplicate capturing + groups are not being used. A non-duplicate named back reference can also + be handled. */ + + if (meta_code == META_RECURSE_BYNAME || + (!is_dupname && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)) + goto RECURSE_OR_BACKREF_LENGTH; /* Handle as a numbered version. */ + } + goto ISNOTFIXED; /* Duplicate name or number */ + + /* The offset values for back references < 10 are in a separate vector + because otherwise they would use more than two parsed pattern elements on + 64-bit systems. */ + + case META_BACKREF: + if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0 || + (cb->external_flags & PCRE2_DUPCAPUSED) != 0) + goto ISNOTFIXED; + group = META_DATA(*pptr); + if (group < 10) + { + offset = cb->small_ref_offset[group]; + goto RECURSE_OR_BACKREF_LENGTH; + } + + /* Fall through for groups >= 10 - picking up group twice does no harm. */ + + /* A true recursion implies not fixed length, but a subroutine call may + be OK. Back reference "recursions" are also failed. */ + + case META_RECURSE: + group = META_DATA(*pptr); + GETPLUSOFFSET(offset, pptr); + + RECURSE_OR_BACKREF_LENGTH: + if (group > cb->bracount) + { + cb->erroroffset = offset; + *errcodeptr = ERR15; /* Non-existent subpattern */ + return -1; + } + if (group == 0) goto ISNOTFIXED; /* Local recursion */ + for (gptr = cb->parsed_pattern; *gptr != META_END; gptr++) + { + if (META_CODE(*gptr) == META_BIGVALUE) gptr++; + else if (*gptr == (META_CAPTURE | group)) break; + } + + gptrend = parsed_skip(gptr, PSKIP_KET); + if (gptrend == NULL) goto PARSED_SKIP_FAILED; + if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */ + for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break; + if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */ + this_recurse.prev = recurses; + this_recurse.groupptr = gptr; + gptr++; + grouplength = get_grouplength(&gptr, errcodeptr, lcptr, group, + &this_recurse, cb); + if (grouplength < 0) + { + if (*errcodeptr == 0) goto ISNOTFIXED; + return -1; /* Error already set */ + } + itemlength = grouplength; + break; + + /* Check nested groups - advance past the initial data for each type and + then seek a fixed length with get_grouplength(). */ + + case META_COND_NAME: + case META_COND_NUMBER: + case META_COND_RNAME: + case META_COND_RNUMBER: + case META_COND_DEFINE: + pptr += 2 + SIZEOFFSET; + goto CHECK_GROUP; + + case META_COND_ASSERT: + pptr += 1; + goto CHECK_GROUP; + + case META_COND_VERSION: + pptr += 4; + goto CHECK_GROUP; + + case META_CAPTURE: + group = META_DATA(*pptr); + /* Fall through */ + + case META_ATOMIC: + case META_NOCAPTURE: + pptr++; + CHECK_GROUP: + grouplength = get_grouplength(&pptr, errcodeptr, lcptr, group, recurses, cb); + if (grouplength < 0) return -1; + itemlength = grouplength; + break; + + /* Exact repetition is OK; variable repetition is not. A repetition of zero + must subtract the length that has already been added. */ + + case META_MINMAX: + case META_MINMAX_PLUS: + case META_MINMAX_QUERY: + if (pptr[1] == pptr[2]) + { + if (pptr[1] == 0) branchlength -= lastitemlength; + else itemlength = (pptr[1] - 1) * lastitemlength; + pptr += 2; + break; + } + /* Fall through */ + + /* Any other item means this branch does not have a fixed length. */ + + default: + ISNOTFIXED: + *errcodeptr = ERR25; /* Not fixed length */ + return -1; + } + + /* Add the item length to the branchlength, and save it for use if the next + thing is a quantifier. */ + + branchlength += itemlength; + lastitemlength = itemlength; + + /* Ensure that the length does not overflow the limit. */ + + if (branchlength > LOOKBEHIND_MAX) + { + *errcodeptr = ERR87; + return -1; + } + } + +EXIT: +*pptrptr = pptr; +if (branchlength > cb->max_lookbehind) cb->max_lookbehind = branchlength; +return branchlength; + +PARSED_SKIP_FAILED: +*errcodeptr = ERR90; +return -1; +} + + + +/************************************************* +* Set lengths in a lookbehind * +*************************************************/ + +/* This function is called for each lookbehind, to set the lengths in its +branches. An error occurs if any branch does not have a fixed length that is +less than the maximum (65535). On exit, the pointer must be left on the final +ket. + +Arguments: + pptrptr pointer to pointer in the parsed pattern + errcodeptr pointer to error code + lcptr pointer to loop counter + recurses chain of recurse_check to catch mutual recursion + cb pointer to compile block + +Returns: TRUE if all is well + FALSE otherwise, with error code and offset set +*/ + +static BOOL +set_lookbehind_lengths(uint32_t **pptrptr, int *errcodeptr, int *lcptr, + parsed_recurse_check *recurses, compile_block *cb) +{ +PCRE2_SIZE offset; +int branchlength; +uint32_t *bptr = *pptrptr; + +READPLUSOFFSET(offset, bptr); /* Offset for error messages */ +*pptrptr += SIZEOFFSET; + +do + { + *pptrptr += 1; + branchlength = get_branchlength(pptrptr, errcodeptr, lcptr, recurses, cb); + if (branchlength < 0) + { + /* The errorcode and offset may already be set from a nested lookbehind. */ + if (*errcodeptr == 0) *errcodeptr = ERR25; + if (cb->erroroffset == PCRE2_UNSET) cb->erroroffset = offset; + return FALSE; + } + *bptr |= branchlength; /* branchlength never more than 65535 */ + bptr = *pptrptr; + } +while (*bptr == META_ALT); + +return TRUE; +} + + + +/************************************************* +* Check parsed pattern lookbehinds * +*************************************************/ + +/* This function is called at the end of parsing a pattern if any lookbehinds +were encountered. It scans the parsed pattern for them, calling +set_lookbehind_lengths() for each one. At the start, the errorcode is zero and +the error offset is marked unset. The enables the functions above not to +override settings from deeper nestings. + +Arguments cb points to the compile block +Returns: 0 on success, or an errorcode (cb->erroroffset will be set) +*/ + +static int +check_lookbehinds(compile_block *cb) +{ +uint32_t *pptr; +int errorcode = 0; +int loopcount = 0; + +cb->erroroffset = PCRE2_UNSET; + +for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++) + { + if (*pptr < META_END) continue; /* Literal */ + + switch (META_CODE(*pptr)) + { + default: + return ERR70; /* Unrecognized meta code */ + + case META_ESCAPE: + if (*pptr - META_ESCAPE == ESC_P || *pptr - META_ESCAPE == ESC_p) + pptr += 1; + break; + + case META_ACCEPT: + case META_ALT: + case META_ASTERISK: + case META_ASTERISK_PLUS: + case META_ASTERISK_QUERY: + case META_ATOMIC: + case META_BACKREF: + case META_CAPTURE: + case META_CIRCUMFLEX: + case META_CLASS: + case META_CLASS_EMPTY: + case META_CLASS_EMPTY_NOT: + case META_CLASS_END: + case META_CLASS_NOT: + case META_COMMIT: + case META_COND_ASSERT: + case META_DOLLAR: + case META_DOT: + case META_FAIL: + case META_KET: + case META_LOOKAHEAD: + case META_LOOKAHEADNOT: + case META_NOCAPTURE: + case META_PLUS: + case META_PLUS_PLUS: + case META_PLUS_QUERY: + case META_PRUNE: + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + case META_RANGE_ESCAPED: + case META_RANGE_LITERAL: + case META_SKIP: + case META_THEN: + break; + + case META_RECURSE: + pptr += SIZEOFFSET; + break; + + case META_BACKREF_BYNAME: + case META_COND_DEFINE: + case META_COND_NAME: + case META_COND_NUMBER: + case META_COND_RNAME: + case META_COND_RNUMBER: + case META_RECURSE_BYNAME: + pptr += 1 + SIZEOFFSET; + break; + + case META_CALLOUT_STRING: + pptr += 3 + SIZEOFFSET; + break; + + case META_BIGVALUE: + case META_OPTIONS: + case META_POSIX: + case META_POSIX_NEG: + pptr += 1; + break; + + case META_MINMAX: + case META_MINMAX_QUERY: + case META_MINMAX_PLUS: + pptr += 2; + break; + + case META_CALLOUT_NUMBER: + case META_COND_VERSION: + pptr += 3; + break; + + case META_MARK: + case META_PRUNE_ARG: + case META_SKIP_ARG: + case META_THEN_ARG: + pptr += 1 + pptr[1]; + break; + + case META_LOOKBEHIND: + case META_LOOKBEHINDNOT: + if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, NULL, cb)) + return errorcode; + break; + } + } + +return 0; +} + + + /************************************************* * External function to compile a pattern * *************************************************/ @@ -8051,7 +8745,7 @@ a pointer to a block of store holding a compiled version of the expression. Arguments: pattern the regular expression - patlen the length of the pattern, or < 0 for zero-terminated + patlen the length of the pattern, or PCRE2_ZERO_TERMINATED options option bits errorptr pointer to errorcode erroroffset pointer to error offset @@ -8065,41 +8759,49 @@ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options, int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext) { -BOOL utf; /* Set TRUE for UTF mode */ -pcre2_real_code *re = NULL; /* What we will return */ -compile_block cb; /* "Static" compile-time data */ -const uint8_t *tables; /* Char tables base pointer */ +BOOL utf; /* Set TRUE for UTF mode */ +BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */ +BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */ +pcre2_real_code *re = NULL; /* What we will return */ +compile_block cb; /* "Static" compile-time data */ +const uint8_t *tables; /* Char tables base pointer */ -PCRE2_UCHAR *code; /* Current pointer in compiled code */ -PCRE2_SPTR codestart; /* Start of compiled code */ -PCRE2_SPTR ptr; /* Current pointer in pattern */ +PCRE2_UCHAR *code; /* Current pointer in compiled code */ +PCRE2_SPTR codestart; /* Start of compiled code */ +PCRE2_SPTR ptr; /* Current pointer in pattern */ +uint32_t *pptr; /* Current pointer in parsed pattern */ -size_t length = 1; /* Allow or final END opcode */ -size_t usedlength; /* Actual length used */ -size_t re_blocksize; /* Size of memory block */ +PCRE2_SIZE length = 1; /* Allow for final END opcode */ +PCRE2_SIZE usedlength; /* Actual length used */ +PCRE2_SIZE re_blocksize; /* Size of memory block */ +PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */ +PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */ -int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ -uint32_t firstcu, reqcu; /* Value of first/req code unit */ -uint32_t setflags = 0; /* NL and BSR set flags */ +int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ +uint32_t firstcu, reqcu; /* Value of first/req code unit */ +uint32_t setflags = 0; /* NL and BSR set flags */ -uint32_t skipatstart; /* When checking (*UTF) etc */ -uint32_t limit_match = UINT32_MAX; /* Unset match limits */ +uint32_t skipatstart; /* When checking (*UTF) etc */ +uint32_t limit_match = UINT32_MAX; /* Unset match limits */ uint32_t limit_recursion = UINT32_MAX; -int newline = 0; /* Unset; can be set by the pattern */ -int bsr = 0; /* Unset; can be set by the pattern */ -int errorcode = 0; /* Initialize to avoid compiler warn */ +int newline = 0; /* Unset; can be set by the pattern */ +int bsr = 0; /* Unset; can be set by the pattern */ +int errorcode = 0; /* Initialize to avoid compiler warn */ +int regexrc; /* Return from compile */ + +uint32_t i; /* Local loop counter */ /* Comments at the head of this file explain about these variables. */ -PCRE2_UCHAR *copied_pattern = NULL; -PCRE2_UCHAR stack_copied_pattern[COPIED_PATTERN_SIZE]; +uint32_t stack_groupinfo[GROUPINFO_DEFAULT_SIZE]; +uint32_t stack_parsed_pattern[PARSED_PATTERN_DEFAULT_SIZE]; named_group named_groups[NAMED_GROUP_LIST_SIZE]; /* The workspace is used in different ways in the different compiling phases. -Ensure that it is 16-bit aligned for the preliminary group scan. */ +It needs to be 16-bit aligned for the preliminary parsing scan. */ -uint16_t c16workspace[(COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint16_t)]; +uint32_t c16workspace[C16_WORK_SIZE]; PCRE2_UCHAR *cworkspace = (PCRE2_UCHAR *)c16workspace; @@ -8133,29 +8835,21 @@ if (ccontext == NULL) ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context)); /* A zero-terminated pattern is indicated by the special length value -PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero, -to ensure that it is always possible to look one code unit beyond the end of -the pattern's characters. */ +PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */ -if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else +if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED))) + patlen = PRIV(strlen)(pattern); + +if (patlen > ccontext->max_pattern_length) { - if (patlen < COPIED_PATTERN_SIZE) - copied_pattern = stack_copied_pattern; - else - { - copied_pattern = ccontext->memctl.malloc(CU2BYTES(patlen + 1), - ccontext->memctl.memory_data); - if (copied_pattern == NULL) - { - *errorptr = ERR21; - return NULL; - } - } - memcpy(copied_pattern, pattern, CU2BYTES(patlen)); - copied_pattern[patlen] = 0; - pattern = copied_pattern; + *errorptr = ERR88; + return NULL; } +/* From here on, all returns from this function should end up going via the +EXIT label. */ + + /* ------------ Initialize the "static" compile data -------------- */ tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables); @@ -8166,14 +8860,16 @@ cb.cbits = tables + cbits_offset; /* tables */ cb.ctypes = tables + ctypes_offset; cb.assert_depth = 0; -cb.bracount = cb.final_bracount = 0; +cb.bracount = 0; cb.cx = ccontext; cb.dupnames = FALSE; cb.end_pattern = pattern + patlen; +cb.erroroffset = 0; cb.external_flags = 0; cb.external_options = options; -cb.hwm = cworkspace; -cb.iscondassert = FALSE; +cb.groupinfo = stack_groupinfo; +cb.had_recurse = FALSE; +cb.lastcapture = 0; cb.max_lookbehind = 0; cb.name_entry_size = 0; cb.name_table = NULL; @@ -8182,6 +8878,7 @@ cb.named_group_list_size = NAMED_GROUP_LIST_SIZE; cb.names_found = 0; cb.open_caps = NULL; cb.parens_depth = 0; +cb.parsed_pattern = stack_parsed_pattern; cb.req_varyopt = 0; cb.start_code = cworkspace; cb.start_pattern = pattern; @@ -8195,23 +8892,43 @@ references to help in deciding whether (.*) can be treated as anchored or not. cb.top_backref = 0; cb.backref_map = 0; +/* Escape sequences \1 to \9 are always back references, but as they are only +two characters long, only two elements can be used in the parsed_pattern +vector. The first contains the reference, and we'd like to use the second to +record the offset in the pattern, so that forward references to non-existent +groups can be diagnosed later with an offset. However, on 64-bit systems, +PCRE2_SIZE won't fit. Instead, we have a vector of offsets for the first +occurrence of \1 to \9, indexed by the second parsed_pattern value. All other +references have enough space for the offset to be put into the parsed pattern. +*/ + +for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET; + + /* --------------- Start looking at the pattern --------------- */ /* Check for global one-time option settings at the start of the pattern, and -remember the offset to the actual regex. */ +remember the offset to the actual regex. With valgrind support, make the +terminator of a zero-terminated pattern inaccessible. This catches bugs that +would otherwise only show up for non-zero-terminated patterns. */ + +#ifdef SUPPORT_VALGRIND +if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1)); +#endif ptr = pattern; skipatstart = 0; -while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && +while (patlen - skipatstart >= 2 && + ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && ptr[skipatstart+1] == CHAR_ASTERISK) { - unsigned int i; for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) { pso *p = pso_list + i; - if (PRIV(strncmp_c8)(ptr+skipatstart+2, (char *)(p->name), p->length) == 0) + if (patlen - skipatstart - 2 >= p->length && + PRIV(strncmp_c8)(ptr+skipatstart+2, (char *)(p->name), p->length) == 0) { uint32_t c, pp; @@ -8240,15 +8957,22 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && case PSO_LIMR: c = 0; pp = skipatstart; + if (!IS_DIGIT(ptr[pp])) + { + errorcode = ERR60; + ptr += pp; + goto HAD_EARLY_ERROR; + } while (IS_DIGIT(ptr[pp])) { if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ - c = c*10 + ptr[pp++] - CHAR_0; + c = c*10 + (ptr[pp++] - CHAR_0); } if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) { errorcode = ERR60; - goto HAD_ERROR; + ptr += pp; + goto HAD_EARLY_ERROR; } if (p->type == PSO_LIMM) limit_match = c; else limit_recursion = c; @@ -8271,7 +8995,7 @@ ptr += skipatstart; if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0) { errorcode = ERR32; - goto HAD_ERROR; + goto HAD_EARLY_ERROR; } #endif @@ -8284,11 +9008,11 @@ if (utf) if ((options & PCRE2_NEVER_UTF) != 0) { errorcode = ERR74; - goto HAD_ERROR; + goto HAD_EARLY_ERROR; } if ((options & PCRE2_NO_UTF_CHECK) == 0 && (errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0) - goto HAD_ERROR; + goto HAD_ERROR; /* Offset was set by valid_utf() */ } /* Check UCP lockout. */ @@ -8297,7 +9021,7 @@ if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) == (PCRE2_UCP|PCRE2_NEVER_UCP)) { errorcode = ERR75; - goto HAD_ERROR; + goto HAD_EARLY_ERROR; } /* Process the BSR setting. */ @@ -8336,23 +9060,102 @@ switch(newline) default: errorcode = ERR56; - goto HAD_ERROR; + goto HAD_EARLY_ERROR; } -/* Before we do anything else, do a pre-scan of the pattern in order to -discover the named groups and their numerical equivalents, so that this -information is always available for the remaining processing. */ +/* Pre-scan the pattern to do two things: (1) Discover the named groups and +their numerical equivalents, so that this information is always available for +the remaining processing. (2) At the same time, parse the pattern and put a +processed version into the parsed_pattern vector. This has escapes interpreted +and comments removed (amongst other things). -errorcode = scan_for_captures(&ptr, cb.external_options, &cb); -if (errorcode != 0) goto HAD_ERROR; +In all but one case, when PCRE2_AUTO_CALLOUT is not set, the number of unsigned +32-bit ints in the parsed pattern is bounded by the length of the pattern plus +one (for the terminator). The exceptional case is when running in 32-bit, +non-UTF mode, when literal characters greater than META_END (0x80000000) have +to be coded as two units. In this case, therefore, we scan the pattern to check +for such values. */ -/* For obscure debugging this code can be enabled. */ - -#if 0 +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!utf) + { + PCRE2_SPTR p; + for (p = ptr; p < cb.end_pattern; p++) if (*p >= META_END) big32count++; + } +#endif + +/* Ensure that the parsed pattern buffer is big enough. When PCRE2_AUTO_CALLOUT +is set we have to assume a numerical callout (4 elements) for each character +plus one at the end. This is overkill, but memory is plentiful these days. For +many smaller patterns the vector on the stack (which was set up above) can be +used. */ + +parsed_size_needed = patlen - skipatstart + big32count; +if ((options & PCRE2_AUTO_CALLOUT) != 0) + parsed_size_needed = (parsed_size_needed + 1) * 5; + +if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE) + { + uint32_t *heap_parsed_pattern = ccontext->memctl.malloc( + (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data); + if (heap_parsed_pattern == NULL) + { + *errorptr = ERR21; + goto EXIT; + } + cb.parsed_pattern = heap_parsed_pattern; + } +cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1; + +/* Do the parsing scan. */ + +errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb); +if (errorcode != 0) goto HAD_CB_ERROR; + +/* Workspace is needed to remember information about numbered groups: whether a +group can match an empty string and what its fixed length is. This is done to +avoid the possibility of recursive references causing very long compile times +when checking these features. Unnumbered groups do not have this exposure since +they cannot be referenced. We use an indexed vector for this purpose. If there +are sufficiently few groups, the default vector on the stack, as set up above, +can be used. Otherwise we have to get/free a special vector. The vector must be +initialized to zero. */ + +if (cb.bracount >= GROUPINFO_DEFAULT_SIZE) + { + cb.groupinfo = ccontext->memctl.malloc( + (cb.bracount + 1)*sizeof(uint32_t), ccontext->memctl.memory_data); + if (cb.groupinfo == NULL) + { + errorcode = ERR21; + cb.erroroffset = 0; + goto HAD_CB_ERROR; + } + } +memset(cb.groupinfo, 0, (cb.bracount + 1) * sizeof(uint32_t)); + +/* If there were any lookbehinds, scan the parsed pattern to figure out their +lengths. */ + +if (has_lookbehind) + { + errorcode = check_lookbehinds(&cb); + if (errorcode != 0) goto HAD_CB_ERROR; + } + +/* For debugging, there is a function that shows the parsed data vector. */ + +#ifdef DEBUG_SHOW_PARSED +fprintf(stderr, "+++ Pre-scan complete:\n"); +show_parsed(&cb); +#endif + +/* For debugging capturing information this code can be enabled. */ + +#ifdef DEBUG_SHOW_CAPTURES { - int i; named_group *ng = cb.named_groups; - fprintf(stderr, "+++Captures: %d\n", cb.final_bracount); + fprintf(stderr, "+++Captures: %d\n", cb.bracount); for (i = 0; i < cb.names_found; i++, ng++) { fprintf(stderr, "+++%3d %.*s\n", ng->number, ng->length, ng->name); @@ -8360,12 +9163,6 @@ if (errorcode != 0) goto HAD_ERROR; } #endif -/* Reset current bracket count to zero and current pointer to the start of the -pattern. */ - -cb.bracount = 0; -ptr = pattern + skipatstart; - /* Pretend to compile the pattern while actually just accumulating the amount of memory required in the 'length' variable. This behaviour is triggered by passing a non-NULL final argument to compile_regex(). We pass a block of @@ -8374,24 +9171,26 @@ compiled code is discarded when it is no longer needed, so hopefully this workspace will never overflow, though there is a test for its doing so. On error, errorcode will be set non-zero, so we don't need to look at the -result of the function. The initial options have been put into the cb block so -that they can be changed if an option setting is found within the regex right -at the beginning. Bringing initial option settings outside can help speed up -starting point checks. We still have to pass a separate options variable (the -first argument) because that may change as the pattern is processed. */ +result of the function. The initial options have been put into the cb block, +but we still have to pass a separate options variable (the first argument) +because the options may change as the pattern is processed. */ +cb.erroroffset = patlen; /* For any subsequent errors that do not set it */ +pptr = cb.parsed_pattern; code = cworkspace; *code = OP_BRA; -(void)compile_regex(cb.external_options, &code, &ptr, &errorcode, FALSE, - FALSE, 0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, - &cb, &length); +(void)compile_regex(cb.external_options, &code, &pptr, &errorcode, 0, &firstcu, + &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, &length); + +if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ + +/* This should be caught in compile_regex(), but just in case... */ -if (errorcode != 0) goto HAD_ERROR; if (length > MAX_PATTERN_SIZE) { errorcode = ERR20; - goto HAD_ERROR; + goto HAD_CB_ERROR; } /* Compute the size of, and then get and initialize, the data block for storing @@ -8406,7 +9205,7 @@ re = (pcre2_real_code *) if (re == NULL) { errorcode = ERR21; - goto HAD_ERROR; + goto HAD_CB_ERROR; } re->memctl = ccontext->memctl; @@ -8424,7 +9223,7 @@ re->first_codeunit = 0; re->last_codeunit = 0; re->bsr_convention = bsr; re->newline_convention = newline; -re->max_lookbehind = +re->max_lookbehind = 0; re->minlength = 0; re->top_bracket = 0; re->top_backref = 0; @@ -8440,22 +9239,16 @@ codestart = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) + /* Update the compile data block for the actual compile. The starting points of the name/number translation table and of the code are passed around in the compile data block. The start/end pattern and initial options are already set -from the pre-compile phase, as is the name_entry_size field. Reset the bracket -count and the names_found field. Also reset the hwm field; this time it's used -for remembering forward references to subpatterns. */ +from the pre-compile phase, as is the name_entry_size field. */ cb.parens_depth = 0; cb.assert_depth = 0; -cb.bracount = 0; -cb.max_lookbehind = 0; +cb.lastcapture = 0; cb.name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)); cb.start_code = codestart; -cb.hwm = (PCRE2_UCHAR *)(cb.start_workspace); -cb.iscondassert = FALSE; cb.req_varyopt = 0; cb.had_accept = FALSE; cb.had_pruneorskip = FALSE; -cb.check_lookbehind = FALSE; cb.open_caps = NULL; /* If any named groups were found, create the name/number table from the list @@ -8463,23 +9256,21 @@ created in the pre-pass. */ if (cb.names_found > 0) { - int i = cb.names_found; named_group *ng = cb.named_groups; - cb.names_found = 0; - for (; i > 0; i--, ng++) - add_name_to_table(&cb, ng->name, ng->length, ng->number); + for (i = 0; i < cb.names_found; i++, ng++) + add_name_to_table(&cb, ng->name, ng->length, ng->number, i); } /* Set up a starting, non-extracting bracket, then compile the expression. On error, errorcode will be set non-zero, so we don't need to look at the result of the function here. */ -ptr = pattern + skipatstart; +pptr = cb.parsed_pattern; code = (PCRE2_UCHAR *)codestart; *code = OP_BRA; -(void)compile_regex(re->overall_options, &code, &ptr, &errorcode, FALSE, FALSE, - 0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL); - +regexrc = compile_regex(re->overall_options, &code, &pptr, &errorcode, 0, + &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL); +if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; re->top_bracket = cb.bracount; re->top_backref = cb.top_backref; re->max_lookbehind = cb.max_lookbehind; @@ -8490,14 +9281,11 @@ if (cb.had_accept) reqcuflags = REQ_NONE; } -/* If we have not reached end of pattern after a successful compile, there's an -excess bracket. Fill in the final opcode and check for disastrous overflow. -If no overflow, but the estimated length exceeds the really used length, adjust -the value of re->blocksize, and if valgrind support is configured, mark the -extra allocated memory as unaddressable, so that any out-of-bound reads can be -detected. */ +/* Fill in the final opcode and check for disastrous overflow. If no overflow, +but the estimated length exceeds the really used length, adjust the value of +re->blocksize, and if valgrind support is configured, mark the extra allocated +memory as unaddressable, so that any out-of-bound reads can be detected. */ -if (errorcode == 0 && ptr < cb.end_pattern) errorcode = ERR22; *code++ = OP_END; usedlength = code - codestart; if (usedlength > length) errorcode = ERR23; else @@ -8508,119 +9296,89 @@ if (usedlength > length) errorcode = ERR23; else #endif } +/* Scan the pattern for recursion/subroutine calls and convert the group +numbers into offsets. Maintain a small cache so that repeated groups containing +recursions are efficiently handled. */ + +#define RSCAN_CACHE_SIZE 8 + +if (errorcode == 0 && cb.had_recurse) + { + PCRE2_UCHAR *rcode; + PCRE2_SPTR rgroup; + unsigned int ccount = 0; + int start = RSCAN_CACHE_SIZE; + recurse_cache rc[RSCAN_CACHE_SIZE]; + + for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf); + rcode != NULL; + rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf)) + { + int p, groupnumber; + + groupnumber = (int)GET(rcode, 1); + if (groupnumber == 0) rgroup = codestart; else + { + PCRE2_SPTR search_from = codestart; + rgroup = NULL; + for (i = 0, p = start; i < ccount; i++, p = (p + 1) & 7) + { + if (groupnumber == rc[p].groupnumber) + { + rgroup = rc[p].group; + break; + } + + /* Group n+1 must always start to the right of group n, so we can save + search time below when the new group number is greater than any of the + previously found groups. */ + + if (groupnumber > rc[p].groupnumber) search_from = rc[p].group; + } + + if (rgroup == NULL) + { + rgroup = PRIV(find_bracket)(search_from, utf, groupnumber); + if (rgroup == NULL) + { + errorcode = ERR53; + break; + } + if (--start < 0) start = RSCAN_CACHE_SIZE - 1; + rc[start].groupnumber = groupnumber; + rc[start].group = rgroup; + if (ccount < RSCAN_CACHE_SIZE) ccount++; + } + } + + PUT(rcode, 1, rgroup - codestart); + } + } + /* In rare debugging situations we sometimes need to look at the compiled code at this stage. */ -#ifdef CALL_PRINTINT +#ifdef DEBUG_CALL_PRINTINT pcre2_printint(re, stderr, TRUE); fprintf(stderr, "Length=%lu Used=%lu\n", length, usedlength); #endif -/* Fill in any forward references that are required. There may be repeated -references; optimize for them, as searching a large regex takes time. The -test of errorcode inside the loop means that nothing is done if it is already -non-zero. */ +/* Unless disabled, check whether any single character iterators can be +auto-possessified. The function overwrites the appropriate opcode values, so +the type of the pointer must be cast. NOTE: the intermediate variable "temp" is +used in this code because at least one compiler gives a warning about loss of +"const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the +function call. */ -if (cb.hwm > cb.start_workspace) +if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0) { - int prev_recno = -1; - PCRE2_SPTR groupptr = NULL; - while (errorcode == 0 && cb.hwm > cb.start_workspace) - { - int offset, recno; - cb.hwm -= LINK_SIZE; - offset = GET(cb.hwm, 0); - recno = GET(codestart, offset); - if (recno != prev_recno) - { - groupptr = PRIV(find_bracket)(codestart, utf, recno); - prev_recno = recno; - } - if (groupptr == NULL) errorcode = ERR53; - else PUT(((PCRE2_UCHAR *)codestart), offset, (int)(groupptr - codestart)); - } + PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart; + if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80; } -/* If the workspace had to be expanded, free the new memory. */ +/* Failed to compile, or error while post-processing. */ -if (cb.workspace_size > COMPILE_WORK_SIZE) - ccontext->memctl.free((void *)cb.start_workspace, - ccontext->memctl.memory_data); - -/* After a successful compile, give an error if there's back reference to a -non-existent capturing subpattern. Then, unless disabled, check whether any -single character iterators can be auto-possessified. The function overwrites -the appropriate opcode values, so the type of the pointer must be cast. NOTE: -the intermediate variable "temp" is used in this code because at least one -compiler gives a warning about loss of "const" attribute if the cast -(PCRE2_UCHAR *)codestart is used directly in the function call. */ - -if (errorcode == 0) - { - if (re->top_backref > re->top_bracket) errorcode = ERR15; - else if ((options & PCRE2_NO_AUTO_POSSESS) == 0) - { - PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart; - if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80; - } - } - -/* If there were any lookbehind assertions that contained OP_RECURSE -(recursions or subroutine calls), a flag is set for them to be checked here, -because they may contain forward references. Actual recursions cannot be fixed -length, but subroutine calls can. It is done like this so that those without -OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The -exceptional ones forgo this. We scan the pattern to check that they are fixed -length, and set their lengths. */ - -if (errorcode == 0 && cb.check_lookbehind) - { - PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart; - - /* Loop, searching for OP_REVERSE items, and process those that do not have - their length set. (Actually, it will also re-process any that have a length - of zero, but that is a pathological case, and it does no harm.) When we find - one, we temporarily terminate the branch it is in while we scan it. Note that - calling find_bracket() with a negative group number returns a pointer to the - OP_REVERSE item, not the actual lookbehind. */ - - for (cc = (PCRE2_UCHAR *)PRIV(find_bracket)(codestart, utf, -1); - cc != NULL; - cc = (PCRE2_UCHAR *)PRIV(find_bracket)(cc, utf, -1)) - { - if (GET(cc, 1) == 0) - { - int fixed_length; - PCRE2_UCHAR *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); - int end_op = *be; - *be = OP_END; - fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL); - *be = end_op; - if (fixed_length < 0) - { - errorcode = (fixed_length == -2)? ERR36 : - (fixed_length == -4)? ERR70 : ERR25; - break; - } - if (fixed_length > cb.max_lookbehind) cb.max_lookbehind = fixed_length; - PUT(cc, 1, fixed_length); - } - cc += 1 + LINK_SIZE; - } - } - -/* Failed to compile, or error while post-processing. Earlier errors get here -via the dreaded goto. */ - -if (errorcode != 0) - { - HAD_ERROR: - pcre2_code_free(re); - re = NULL; - *errorptr = errorcode; - *erroroffset = (int)(ptr - pattern); - goto EXIT; - } +if (errorcode != 0) goto HAD_CB_ERROR; /* Successful compile. If the anchored option was not passed, set it if we can determine that the pattern is anchored by virtue of ^ characters or \A @@ -8629,7 +9387,7 @@ there are no occurrences of *PRUNE or *SKIP (though there is an option to disable this case). */ if ((re->overall_options & PCRE2_ANCHORED) == 0 && - is_anchored(codestart, 0, &cb, 0)) + is_anchored(codestart, 0, &cb, 0, FALSE)) re->overall_options |= PCRE2_ANCHORED; /* If the pattern is still not anchored and we do not have a first code unit, @@ -8678,7 +9436,8 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0) when *PRUNE and SKIP are not present. (There is an option that disables this case.) */ - else if (is_startline(codestart, 0, &cb, 0)) re->flags |= PCRE2_STARTLINE; + else if (is_startline(codestart, 0, &cb, 0, FALSE)) + re->flags |= PCRE2_STARTLINE; } /* Handle the "required code unit", if one is set. In the case of an anchored @@ -8707,20 +9466,6 @@ if (reqcuflags >= 0 && } } -/* Check for a pattern than can match an empty string, so that this information -can be provided to applications. */ - -do - { - if (could_be_empty_branch(codestart, code, utf, &cb, NULL)) - { - re->flags |= PCRE2_MATCH_EMPTY; - break; - } - codestart += GET(codestart, 1); - } -while (*codestart == OP_ALT); - /* Finally, unless PCRE2_NO_START_OPTIMIZE is set, study the compiled pattern to set up information such as a bitmap of starting code units and a minimum matching length. */ @@ -8729,20 +9474,44 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && PRIV(study)(re) != 0) { errorcode = ERR31; - goto HAD_ERROR; + goto HAD_CB_ERROR; } -/* Control ends up here in all cases. If memory was obtained for a -zero-terminated copy of the pattern, remember to free it before returning. Also -free the list of named groups if a larger one had to be obtained. */ +/* Control ends up here in all cases. When running under valgrind, make a +pattern's terminating zero defined again. If memory was obtained for the parsed +version of the pattern, free it before returning. Also free the list of named +groups if a larger one had to be obtained, and likewise the group information +vector. */ EXIT: -if (copied_pattern != stack_copied_pattern) - ccontext->memctl.free(copied_pattern, ccontext->memctl.memory_data); +#ifdef SUPPORT_VALGRIND +if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1)); +#endif +if (cb.parsed_pattern != stack_parsed_pattern) + ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data); if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE) ccontext->memctl.free((void *)cb.named_groups, ccontext->memctl.memory_data); - +if (cb.groupinfo != stack_groupinfo) + ccontext->memctl.free((void *)cb.groupinfo, ccontext->memctl.memory_data); return re; /* Will be NULL after an error */ + +/* Errors discovered in parse_regex() set the offset value in the compile +block. Errors discovered before it is called must compute it from the ptr +value. After parse_regex() is called, the offset in the compile block is set to +the end of the pattern, but certain errors in compile_regex() may reset it if +an offset is available in the parsed pattern. */ + +HAD_CB_ERROR: +ptr = pattern + cb.erroroffset; + +HAD_EARLY_ERROR: +*erroroffset = ptr - pattern; + +HAD_ERROR: +*errorptr = errorcode; +pcre2_code_free(re); +re = NULL; +goto EXIT; } /* End of pcre2_compile.c */ diff --git a/pcre2/src/pcre2_config.c b/pcre2/src/pcre2_config.c index 22aa3587d..e99272f57 100644 --- a/pcre2/src/pcre2_config.c +++ b/pcre2/src/pcre2_config.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -61,15 +61,16 @@ convenient for user programs that want to test their values. */ * Return info about what features are configured * *************************************************/ -/* +/* If where is NULL, the length of memory required is returned. + Arguments: what what information is required where where to put the information -Returns: 0 if data returned - >= 0 if where is NULL, giving length required +Returns: 0 if a numerical value is returned + >= 0 if a string value PCRE2_ERROR_BADOPTION if "where" not recognized - or JIT target requested when JIT not enabled + or JIT target requested when JIT not enabled */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION @@ -127,15 +128,15 @@ switch (what) #ifdef SUPPORT_JIT { const char *v = PRIV(jit_get_target)(); - return 1 + ((where == NULL)? - strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)); + return (int)(1 + ((where == NULL)? + strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); } #else return PCRE2_ERROR_BADOPTION; #endif case PCRE2_CONFIG_LINKSIZE: - *((uint32_t *)where) = configured_link_size; + *((uint32_t *)where) = (uint32_t)configured_link_size; break; case PCRE2_CONFIG_MATCHLIMIT: @@ -169,8 +170,8 @@ switch (what) #else const char *v = "Unicode not supported"; #endif - return 1 + ((where == NULL)? - strlen(v): PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)); + return (int)(1 + ((where == NULL)? + strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); } break; @@ -206,8 +207,8 @@ switch (what) const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)? XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE); - return 1 + ((where == NULL)? - strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)); + return (int)(1 + ((where == NULL)? + strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); } } diff --git a/pcre2/src/pcre2_context.c b/pcre2/src/pcre2_context.c index 6146999df..ae050fe92 100644 --- a/pcre2/src/pcre2_context.c +++ b/pcre2/src/pcre2_context.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -131,13 +131,14 @@ return gcontext; when no context is supplied to the compile function. */ const pcre2_compile_context PRIV(default_compile_context) = { - { default_malloc, default_free, NULL }, - NULL, - NULL, - PRIV(default_tables), - BSR_DEFAULT, - NEWLINE_DEFAULT, - PARENS_NEST_LIMIT }; + { default_malloc, default_free, NULL }, /* Default memory handling */ + NULL, /* Stack guard */ + NULL, /* Stack guard data */ + PRIV(default_tables), /* Character tables */ + PCRE2_UNSET, /* Max pattern length */ + BSR_DEFAULT, /* Backslash R default */ + NEWLINE_DEFAULT, /* Newline convention */ + PARENS_NEST_LIMIT }; /* As it says */ /* The create function copies the default into the new memory, but must override the default memory handling functions if a gcontext was provided. */ @@ -169,6 +170,7 @@ const pcre2_match_context PRIV(default_match_context) = { #endif NULL, NULL, + PCRE2_UNSET, /* Offset limit */ MATCH_LIMIT, MATCH_LIMIT_RECURSION }; @@ -294,6 +296,13 @@ switch(value) } } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length) +{ +ccontext->max_pattern_length = length; +return 0; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline) { @@ -347,6 +356,13 @@ mcontext->match_limit = limit; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit) +{ +mcontext->offset_limit = limit; +return 0; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) { diff --git a/pcre2/src/pcre2_dfa_match.c b/pcre2/src/pcre2_dfa_match.c index b14477def..c909d6128 100644 --- a/pcre2/src/pcre2_dfa_match.c +++ b/pcre2/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -371,7 +371,7 @@ internal_dfa_match( uint32_t offsetcount, int *workspace, int wscount, - int rlevel) + uint32_t rlevel) { stateblock *active_states, *new_states, *temp_states; stateblock *next_active_state, *next_new_state; @@ -400,8 +400,8 @@ BOOL utf = FALSE; BOOL reset_could_continue = FALSE; -rlevel++; -offsetcount &= (-2); +if (rlevel++ > mb->match_limit_recursion) return PCRE2_ERROR_RECURSIONLIMIT; +offsetcount &= (uint32_t)(-2); /* Round down */ wscount -= 2; wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) / @@ -433,13 +433,13 @@ move back, and set up each alternative appropriately. */ if (*first_op == OP_REVERSE) { - int max_back = 0; - int gone_back; + size_t max_back = 0; + size_t gone_back; end_code = this_start_code; do { - int back = GET(end_code, 2+LINK_SIZE); + size_t back = (size_t)GET(end_code, 2+LINK_SIZE); if (back > max_back) max_back = back; end_code += GET(end_code, 1); } @@ -466,8 +466,8 @@ if (*first_op == OP_REVERSE) /* In byte-mode we can do this quickly. */ { - gone_back = (current_subject - max_back < start_subject)? - (int)(current_subject - start_subject) : max_back; + size_t current_offset = (size_t)(current_subject - start_subject); + gone_back = (current_offset < max_back)? current_offset : max_back; current_subject -= gone_back; } @@ -481,11 +481,11 @@ if (*first_op == OP_REVERSE) end_code = this_start_code; do { - int back = GET(end_code, 2+LINK_SIZE); + size_t back = (size_t)GET(end_code, 2+LINK_SIZE); if (back <= gone_back) { int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE); - ADD_NEW_DATA(-bstate, 0, gone_back - back); + ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back)); } end_code += GET(end_code, 1); } @@ -509,7 +509,7 @@ else do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT); new_count = workspace[1]; if (!workspace[0]) - memcpy(new_states, active_states, new_count * sizeof(stateblock)); + memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock)); } /* Not restarting */ @@ -593,8 +593,9 @@ for (;;) stateblock *current_state = active_states + i; BOOL caseless = FALSE; PCRE2_SPTR code; + uint32_t codevalue; int state_offset = current_state->offset; - int codevalue, rrc; + int rrc; int count; /* A negative offset is a special case meaning "hold off going to this @@ -719,7 +720,7 @@ for (;;) ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); if (codevalue != OP_KET) { - ADD_ACTIVE(state_offset - GET(code, 1), 0); + ADD_ACTIVE(state_offset - (int)GET(code, 1), 0); } } else @@ -733,11 +734,12 @@ for (;;) else if (match_count > 0 && ++match_count * 2 > (int)offsetcount) match_count = 0; count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2; - if (count > 0) memmove(offsets + 2, offsets, count * sizeof(PCRE2_SIZE)); + if (count > 0) memmove(offsets + 2, offsets, + (size_t)count * sizeof(PCRE2_SIZE)); if (offsetcount >= 2) { - offsets[0] = (int)(current_subject - start_subject); - offsets[1] = (int)(ptr - start_subject); + offsets[0] = (PCRE2_SIZE)(current_subject - start_subject); + offsets[1] = (PCRE2_SIZE)(ptr - start_subject); } if ((mb->moptions & PCRE2_DFA_SHORTEST) != 0) return match_count; } @@ -959,7 +961,7 @@ for (;;) { if (d == '_') left_word = TRUE; else { - int cat = UCD_CATEGORY(d); + uint32_t cat = UCD_CATEGORY(d); left_word = (cat == ucp_L || cat == ucp_N); } } @@ -984,7 +986,7 @@ for (;;) { if (c == '_') right_word = TRUE; else { - int cat = UCD_CATEGORY(c); + uint32_t cat = UCD_CATEGORY(c); right_word = (cat == ucp_L || cat == ucp_N); } } @@ -1369,7 +1371,7 @@ for (;;) if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } if (clen > 0) { - int lgb, rgb; + uint32_t lgb, rgb; PCRE2_SPTR nptr = ptr + clen; int ncount = 0; if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) @@ -1383,7 +1385,7 @@ for (;;) dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; ncount++; lgb = rgb; nptr += dlen; @@ -1630,7 +1632,7 @@ for (;;) ADD_ACTIVE(state_offset + 2, 0); if (clen > 0) { - int lgb, rgb; + uint32_t lgb, rgb; PCRE2_SPTR nptr = ptr + clen; int ncount = 0; if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || @@ -1645,7 +1647,7 @@ for (;;) dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; ncount++; lgb = rgb; nptr += dlen; @@ -1902,7 +1904,7 @@ for (;;) count = current_state->count; /* Number already matched */ if (clen > 0) { - int lgb, rgb; + uint32_t lgb, rgb; PCRE2_SPTR nptr = ptr + clen; int ncount = 0; if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) @@ -1916,7 +1918,7 @@ for (;;) dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; ncount++; lgb = rgb; nptr += dlen; @@ -2097,7 +2099,7 @@ for (;;) case OP_EXTUNI: if (clen > 0) { - int lgb, rgb; + uint32_t lgb, rgb; PCRE2_SPTR nptr = ptr + clen; int ncount = 0; lgb = UCD_GRAPHBREAK(c); @@ -2106,7 +2108,7 @@ for (;;) dlen = 1; if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); } rgb = UCD_GRAPHBREAK(d); - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; ncount++; lgb = rgb; nptr += dlen; @@ -2582,14 +2584,14 @@ for (;;) mb, /* static match data */ code, /* this subexpression's code */ ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ local_workspace, /* workspace vector */ sizeof(local_workspace)/sizeof(int), /* size of same */ rlevel); /* function recursion level */ - if (rc == PCRE2_ERROR_DFA_UITEM) return rc; + if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } } @@ -2601,8 +2603,8 @@ for (;;) { PCRE2_SIZE local_offsets[1000]; int local_workspace[1000]; - int codelink = GET(code, 1); - int condcode; + int codelink = (int)GET(code, 1); + PCRE2_UCHAR condcode; /* Because of the way auto-callout works during compile, a callout item is inserted between OP_COND and an assertion condition. This does not @@ -2611,8 +2613,10 @@ for (;;) if (code[LINK_SIZE + 1] == OP_CALLOUT || code[LINK_SIZE + 1] == OP_CALLOUT_STR) { - unsigned int callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT) - ? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 2 + 3*LINK_SIZE); + PCRE2_SIZE callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT)? + (PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] : + (PCRE2_SIZE)GET(code, 2 + 3*LINK_SIZE); + rrc = 0; if (mb->callout != NULL) { @@ -2678,7 +2682,7 @@ for (;;) else if (condcode == OP_RREF) { - int value = GET2(code, LINK_SIZE + 2); + unsigned int value = GET2(code, LINK_SIZE + 2); if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND; if (mb->recursive != NULL) { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); } @@ -2699,14 +2703,14 @@ for (;;) mb, /* fixed match data */ asscode, /* this subexpression's code */ ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ local_workspace, /* workspace vector */ sizeof(local_workspace)/sizeof(int), /* size of same */ rlevel); /* function recursion level */ - if (rc == PCRE2_ERROR_DFA_UITEM) return rc; + if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; if ((rc >= 0) == (condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } @@ -2747,7 +2751,7 @@ for (;;) mb, /* fixed match data */ callpat, /* this subexpression's code */ ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ local_workspace, /* workspace vector */ @@ -2768,18 +2772,19 @@ for (;;) { for (rc = rc*2 - 2; rc >= 0; rc -= 2) { - int charcount = local_offsets[rc+1] - local_offsets[rc]; + PCRE2_SIZE charcount = local_offsets[rc+1] - local_offsets[rc]; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (utf) { PCRE2_SPTR p = start_subject + local_offsets[rc]; PCRE2_SPTR pp = start_subject + local_offsets[rc+1]; - while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--; } #endif if (charcount > 0) { - ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); + ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, + (int)(charcount - 1)); } else { @@ -2798,7 +2803,7 @@ for (;;) case OP_SCBRAPOS: case OP_BRAPOSZERO: { - int charcount, matched_count; + PCRE2_SIZE charcount, matched_count; PCRE2_SPTR local_ptr = ptr; BOOL allow_zero; @@ -2821,7 +2826,7 @@ for (;;) mb, /* fixed match data */ code, /* this subexpression's code */ local_ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ local_workspace, /* workspace vector */ @@ -2872,11 +2877,11 @@ for (;;) { PCRE2_SPTR p = ptr; PCRE2_SPTR pp = local_ptr; - charcount = (int)(pp - p); + charcount = (PCRE2_SIZE)(pp - p); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--; #endif - ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); + ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1)); } } } @@ -2893,7 +2898,7 @@ for (;;) mb, /* fixed match data */ code, /* this subexpression's code */ ptr, /* where we currently are */ - (int)(ptr - start_subject), /* start offset */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ local_workspace, /* workspace vector */ @@ -2903,7 +2908,7 @@ for (;;) if (rc >= 0) { PCRE2_SPTR end_subpattern = code; - int charcount = local_offsets[1] - local_offsets[0]; + PCRE2_SIZE charcount = local_offsets[1] - local_offsets[0]; int next_state_offset, repeat_state_offset; do { end_subpattern += GET(end_subpattern, 1); } @@ -2960,12 +2965,12 @@ for (;;) { PCRE2_SPTR p = start_subject + local_offsets[0]; PCRE2_SPTR pp = start_subject + local_offsets[1]; - while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; + while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--; } #endif - ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); + ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1)); if (repeat_state_offset >= 0) - { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } + { ADD_NEW_DATA(-repeat_state_offset, 0, (int)(charcount - 1)); } } } else if (rc != PCRE2_ERROR_NOMATCH) return rc; @@ -3018,7 +3023,7 @@ for (;;) return rrc; /* Abandon */ } if (rrc == 0) - { ADD_ACTIVE(state_offset + callout_length, 0); } + { ADD_ACTIVE(state_offset + (int)callout_length, 0); } } break; @@ -3110,12 +3115,13 @@ Returns: > 0 => number of match offset pairs placed in offsets PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, - pcre2_match_context *mcontext, int *workspace, size_t wscount) + pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount) { const pcre2_real_code *re = (const pcre2_real_code *)code; PCRE2_SPTR start_match; PCRE2_SPTR end_subject; +PCRE2_SPTR bumpalong_limit; PCRE2_SPTR req_cu_ptr; BOOL utf, anchored, startline, firstline; @@ -3172,15 +3178,10 @@ occur. */ #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) -options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO)); +options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1))); #undef FF #undef OO -/* A NULL match context means "use a default context" */ - -if (mcontext == NULL) - mcontext = (pcre2_match_context *)(&PRIV(default_match_context)); - /* If restarting after a partial match, do some sanity checks on the contents of the workspace. */ @@ -3205,20 +3206,33 @@ where to start. */ startline = (re->flags & PCRE2_STARTLINE) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; +bumpalong_limit = end_subject; -/* Fill in the fields in the match block. */ +/* Get data from the match context, if present, and fill in the fields in the +match block. It is an error to set an offset limit without setting the flag at +compile time. */ if (mcontext == NULL) { mb->callout = NULL; mb->memctl = re->memctl; + mb->match_limit_recursion = PRIV(default_match_context).recursion_limit; } else { + if (mcontext->offset_limit != PCRE2_UNSET) + { + if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) + return PCRE2_ERROR_BADOFFSETLIMIT; + bumpalong_limit = subject + mcontext->offset_limit; + } mb->callout = mcontext->callout; mb->callout_data = mcontext->callout_data; mb->memctl = mcontext->memctl; + mb->match_limit_recursion = mcontext->recursion_limit; } +if (mb->match_limit_recursion > re->limit_recursion) + mb->match_limit_recursion = re->limit_recursion; mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + re->name_count * re->name_entry_size; @@ -3264,18 +3278,50 @@ switch(re->newline_convention) /* Check a UTF string for validity if required. For 8-bit and 16-bit strings, we must also check that a starting offset does not point into the middle of a -multiunit character. */ +multiunit character. We check only the portion of the subject that is going to +be inspected during matching - from the offset minus the maximum back reference +to the given length. This saves time when a small part of a large subject is +being matched by the use of a starting offset. Note that the maximum lookbehind +is a number of characters, not code units. */ #ifdef SUPPORT_UNICODE if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) { - match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar)); - if (match_data->rc != 0) return match_data->rc; + PCRE2_SPTR check_subject = start_match; /* start_match includes offset */ + + if (start_offset > 0) + { #if PCRE2_CODE_UNIT_WIDTH != 32 - if (start_offset > 0 && start_offset < length && - NOT_FIRSTCHAR(subject[start_offset])) - return PCRE2_ERROR_BADUTFOFFSET; + unsigned int i; + if (start_match < end_subject && NOT_FIRSTCU(*start_match)) + return PCRE2_ERROR_BADUTFOFFSET; + for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--) + { + check_subject--; + while (check_subject > subject && +#if PCRE2_CODE_UNIT_WIDTH == 8 + (*check_subject & 0xc0) == 0x80) +#else /* 16-bit */ + (*check_subject & 0xfc00) == 0xdc00) +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + check_subject--; + } +#else /* In the 32-bit library, one code unit equals one character. */ + check_subject -= re->max_lookbehind; + if (check_subject < subject) check_subject = subject; #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + } + + /* Validate the relevant portion of the subject. After an error, adjust the + offset to be an absolute offset in the whole string. */ + + match_data->rc = PRIV(valid_utf)(check_subject, + length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar)); + if (match_data->rc != 0) + { + match_data->startchar += (PCRE2_SIZE)(check_subject - subject); + return match_data->rc; + } } #endif /* SUPPORT_UNICODE */ @@ -3295,7 +3341,8 @@ if (!anchored) { first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 - if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu); + if (utf && first_cu > 127) + first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu); #endif } } @@ -3315,7 +3362,7 @@ if ((re->flags & PCRE2_LASTSET) != 0) { req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 - if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu); + if (utf && req_cu > 127) req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu); #endif } } @@ -3427,7 +3474,7 @@ for (;;) { while (start_match < end_subject) { - register uint32_t c = UCHAR21TEST(start_match); + uint32_t c = UCHAR21TEST(start_match); #if PCRE2_CODE_UNIT_WIDTH != 8 if (c > 255) c = 255; #endif @@ -3467,7 +3514,7 @@ for (;;) if (has_req_cu && end_subject - start_match < REQ_CU_MAX) { - register PCRE2_SPTR p = start_match + (has_first_cu? 1:0); + PCRE2_SPTR p = start_match + (has_first_cu? 1:0); /* We don't need to repeat the search if we haven't yet reached the place we found it at last time. */ @@ -3478,7 +3525,7 @@ for (;;) { while (p < end_subject) { - register uint32_t pp = UCHAR21INCTEST(p); + uint32_t pp = UCHAR21INCTEST(p); if (pp == req_cu || pp == req_cu2) { p--; break; } } } @@ -3507,6 +3554,10 @@ for (;;) /* ------------ End of start of match optimizations ------------ */ + /* Give no match if we have passed the bumpalong limit. */ + + if (start_match > bumpalong_limit) break; + /* OK, now we can do the business */ mb->start_used_ptr = start_match; @@ -3519,9 +3570,9 @@ for (;;) start_match, /* where we currently are */ start_offset, /* start offset in subject */ match_data->ovector, /* offset vector */ - match_data->oveccount * 2, /* actual size of same */ + (uint32_t)match_data->oveccount * 2, /* actual size of same */ workspace, /* workspace vector */ - wscount, /* size of same */ + (int)wscount, /* size of same */ 0); /* function recurse level */ /* Anything other than "no match" means we are done, always; otherwise, carry @@ -3535,7 +3586,7 @@ for (;;) match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject); } match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject); - match_data->rightchar = mb->last_used_ptr - subject; + match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject); match_data->startchar = (PCRE2_SIZE)(start_match - subject); match_data->rc = rc; return rc; diff --git a/pcre2/src/pcre2_error.c b/pcre2/src/pcre2_error.c index c539bd23e..437bdfd20 100644 --- a/pcre2/src/pcre2_error.c +++ b/pcre2/src/pcre2_error.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -51,11 +51,10 @@ POSSIBILITY OF SUCH DAMAGE. /* The texts of compile-time error messages. Compile-time error numbers start at COMPILE_ERROR_BASE (100). -Do not ever re-use any error number, because they are documented. Always add a -new error instead. This used to be a table of strings, but in order to reduce -the number of relocations needed when a shared library is loaded dynamically, -it is now one long string. We cannot use a table of offsets, because the -lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, +This used to be a table of strings, but in order to reduce the number of +relocations needed when a shared library is loaded dynamically, it is now one +long string. We cannot use a table of offsets, because the lengths of inserts +such as XSTRING(MAX_NAME_SIZE) are not known. Instead, pcre2_get_error_message() counts through to the one it wants - this isn't a performance issue because these strings are used only when there is an error. @@ -63,7 +62,7 @@ Each substring ends with \0 to insert a null character. This includes the final substring, so that the whole string ends with \0\0, which can be detected when counting through. */ -static const char compile_error_texts[] = +static const unsigned char compile_error_texts[] = "no error\0" "\\ at end of pattern\0" "\\c at end of pattern\0" @@ -92,13 +91,13 @@ static const char compile_error_texts[] = "failed to allocate heap memory\0" "unmatched closing parenthesis\0" "internal error: code overflow\0" - "unrecognized character after (?<\0" + "missing closing parenthesis for condition\0" /* 25 */ "lookbehind assertion is not fixed length\0" - "malformed number or name after (?(\0" + "a relative value of zero is not allowed\0" "conditional group contains more than two branches\0" "assertion expected after (?( or (?(?C)\0" - "(?R or (?[+-]digits must be followed by )\0" + "digit expected after (?+ or (?-\0" /* 30 */ "unknown POSIX class name\0" "internal error in pcre2_study(): should not occur\0" @@ -106,13 +105,13 @@ static const char compile_error_texts[] = "parentheses are too deeply nested (stack check)\0" "character code point value in \\x{} or \\o{} is too large\0" /* 35 */ - "invalid condition (?(0)\0" - "\\C is not allowed in a lookbehind assertion\0" + "lookbehind is too complicated\0" + "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0" "number after (?C is greater than 255\0" "closing parenthesis for (?C expected\0" /* 40 */ - "recursion could loop indefinitely\0" + "invalid escape sequence in (*VERB) name\0" "unrecognized character after (?P\0" "syntax error in subpattern name (missing terminator)\0" "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0" @@ -133,13 +132,13 @@ static const char compile_error_texts[] = "missing opening brace after \\o\0" "internal error: unknown newline setting\0" "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" - "a numbered reference must not be zero\0" + "(?R (recursive pattern call) must be followed by a closing parenthesis\0" "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" /* 60 */ "(*VERB) not recognized or malformed\0" - "number is too big\0" + "group number is too big\0" "subpattern name expected\0" - "digit expected after (?+\0" + "internal error: parsed pattern overflow\0" "non-octal character in \\o{} (closing brace missing?)\0" /* 65 */ "different names for subpatterns of the same number are not allowed\0" @@ -152,9 +151,9 @@ static const char compile_error_texts[] = #endif "\\k is not followed by a braced, angle-bracketed, or quoted name\0" /* 70 */ - "internal error: unknown opcode in find_fixedlength()\0" + "internal error: unknown meta code in check_lookbehinds()\0" "\\N is not supported in a class\0" - "too many forward references\0" + "callout string is too long\0" "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" "using UTF is disabled by the application\0" /* 75 */ @@ -162,18 +161,26 @@ static const char compile_error_texts[] = "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "character code point value in \\u.... sequence is too large\0" "digits missing in \\x{} or \\o{}\0" - "syntax error in (?(VERSION condition\0" + "syntax error or number too big in (?(VERSION condition\0" /* 80 */ "internal error: unknown opcode in auto_possessify()\0" "missing terminating delimiter for callout with string argument\0" "unrecognized string delimiter follows (?C\0" "using \\C is disabled by the application\0" "(?| and/or (?J: or (?x: parentheses are too deeply nested\0" + /* 85 */ + "using \\C is disabled in this PCRE2 library\0" + "regular expression is too complicated\0" + "lookbehind assertion is too long\0" + "pattern string is longer than the limit set by the application\0" + "internal error: unknown code in parsed pattern\0" + /* 90 */ + "internal error: bad code value in parsed_skip()\0" ; /* Match-time and UTF error texts are in the same format. */ -static const char match_error_texts[] = +static const unsigned char match_error_texts[] = "no error\0" "no match\0" "partial match\0" @@ -200,7 +207,7 @@ static const char match_error_texts[] = /* 20 */ "UTF-8 error: overlong 5-byte sequence\0" "UTF-8 error: overlong 6-byte sequence\0" - "UTF-8 error: isolated 0x80 byte\0" + "UTF-8 error: isolated byte with 0x80 bit set\0" "UTF-8 error: illegal byte (0xfe or 0xff)\0" "UTF-16 error: missing low surrogate at end\0" /* 25 */ @@ -239,7 +246,16 @@ static const char match_error_texts[] = "nested recursion at the same subject position\0" "recursion limit exceeded\0" "requested value is not available\0" + /* 55 */ "requested value is not set\0" + "offset limit set without PCRE2_USE_OFFSET_LIMIT\0" + "bad escape sequence in replacement string\0" + "expected closing curly bracket in replacement string\0" + "bad substitution in replacement string\0" + /* 60 */ + "match with end before start is not supported\0" + "too many replacements (more than INT_MAX)\0" + "bad serialized data\0" ; @@ -262,34 +278,34 @@ Returns: length of message if all is well */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size) +pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size) { -char xbuff[128]; -const char *message; -size_t i; -uint32_t n; +const unsigned char *message; +PCRE2_SIZE i; +int n; if (size == 0) return PCRE2_ERROR_NOMEMORY; -if (enumber > COMPILE_ERROR_BASE) /* Compile error */ +if (enumber >= COMPILE_ERROR_BASE) /* Compile error */ { message = compile_error_texts; n = enumber - COMPILE_ERROR_BASE; } -else /* Match or UTF error */ +else if (enumber < 0) /* Match or UTF error */ { message = match_error_texts; n = -enumber; } +else /* Invalid error number */ + { + message = (unsigned char *)"\0"; /* Empty message list */ + n = 1; + } for (; n > 0; n--) { while (*message++ != CHAR_NULL) {}; - if (*message == CHAR_NULL) - { - sprintf(xbuff, "No text for error %d", enumber); - break; - } + if (*message == CHAR_NULL) return PCRE2_ERROR_BADDATA; } for (i = 0; *message != 0; i++) @@ -303,7 +319,7 @@ for (i = 0; *message != 0; i++) } buffer[i] = 0; -return i; +return (int)i; } /* End of pcre2_error.c */ diff --git a/pcre2/src/pcre2_find_bracket.c b/pcre2/src/pcre2_find_bracket.c new file mode 100644 index 000000000..357385a11 --- /dev/null +++ b/pcre2/src/pcre2_find_bracket.c @@ -0,0 +1,218 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains a single function that scans through a compiled pattern +until it finds a capturing bracket with the given number, or, if the number is +negative, an instance of OP_REVERSE for a lookbehind. The function is called +from pcre2_compile.c and also from pcre2_study.c when finding the minimum +matching length. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + +/************************************************* +* Scan compiled regex for specific bracket * +*************************************************/ + +/* +Arguments: + code points to start of expression + utf TRUE in UTF mode + number the required bracket number or negative to find a lookbehind + +Returns: pointer to the opcode for the bracket, or NULL if not found +*/ + +PCRE2_SPTR +PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number) +{ +for (;;) + { + PCRE2_UCHAR c = *code; + + if (c == OP_END) return NULL; + + /* XCLASS is used for classes that cannot be represented just by a bit map. + This includes negated single high-valued characters. CALLOUT_STR is used for + callouts with string arguments. In both cases the length in the table is + zero; the actual length is stored in the compiled code. */ + + if (c == OP_XCLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); + + /* Handle lookbehind */ + + else if (c == OP_REVERSE) + { + if (number < 0) return (PCRE2_UCHAR *)code; + code += PRIV(OP_lengths)[c]; + } + + /* Handle capturing bracket */ + + else if (c == OP_CBRA || c == OP_SCBRA || + c == OP_CBRAPOS || c == OP_SCBRAPOS) + { + int n = (int)GET2(code, 1+LINK_SIZE); + if (n == number) return (PCRE2_UCHAR *)code; + code += PRIV(OP_lengths)[c]; + } + + /* Otherwise, we can get the item's length from the table, except that for + repeated character types, we have to test for \p and \P, which have an extra + two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we + must add in its length. */ + + else + { + switch(c) + { + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + break; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSUPTO: + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) + code += 2; + break; + + case OP_MARK: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1]; + break; + } + + /* Add in the fixed length from the table */ + + code += PRIV(OP_lengths)[c]; + + /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be + followed by a multi-byte character. The length in the table is a minimum, so + we have to arrange to skip the extra bytes. */ + +#ifdef MAYBE_UTF_MULTI + if (utf) switch(c) + { + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_EXACT: + case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: + case OP_UPTO: + case OP_UPTOI: + case OP_NOTUPTO: + case OP_NOTUPTOI: + case OP_MINUPTO: + case OP_MINUPTOI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + case OP_POSUPTO: + case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + case OP_STAR: + case OP_STARI: + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_MINSTAR: + case OP_MINSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_POSSTAR: + case OP_POSSTARI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + case OP_PLUS: + case OP_PLUSI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_POSPLUS: + case OP_POSPLUSI: + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + case OP_QUERY: + case OP_QUERYI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_MINQUERY: + case OP_MINQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + case OP_POSQUERY: + case OP_POSQUERYI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); + break; + } +#else + (void)(utf); /* Keep compiler happy by referencing function argument */ +#endif /* MAYBE_UTF_MULTI */ + } + } +} + +/* End of pcre2_find_bracket.c */ diff --git a/pcre2/src/pcre2_fuzzsupport.c b/pcre2/src/pcre2_fuzzsupport.c new file mode 100644 index 000000000..462b48a93 --- /dev/null +++ b/pcre2/src/pcre2_fuzzsupport.c @@ -0,0 +1,316 @@ +/*************************************************************************** +Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it +tries to compile and match it, deriving options from the string itself. If +STANDALONE is defined, a main program that calls the driver with the contents +of specified files is compiled, and commentary on what is happening is output. +If an argument starts with '=' the rest of it it is taken as a literal string +rather than a file name. This allows easy testing of short strings. + +Written by Philip Hazel, October 2016 +***************************************************************************/ + +#include +#include +#include +#include + +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" + +#define MAX_MATCH_SIZE 1000 + +#define ALLOWED_COMPILE_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ + PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ + PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ + PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ + PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \ + PCRE2_UTF) + +#define ALLOWED_MATCH_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \ + PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT) + +/* This is the callout function. Its only purpose is to halt matching if there +are more than 100 callouts, as one way of stopping too much time being spent on +fruitless matches. The callout data is a pointer to the counter. */ + +static int callout_function(pcre2_callout_block *cb, void *callout_data) +{ +(void)cb; /* Avoid unused parameter warning */ +*((uint32_t *)callout_data) += 1; +return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0; +} + +/* Putting in this apparently unnecessary prototype prevents gcc from giving a +"no previous prototype" warning when compiling at high warning level. */ + +int LLVMFuzzerTestOneInput(const unsigned char *, size_t); + +/* Here's the driving function. */ + +int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) +{ +uint32_t compile_options; +uint32_t match_options; +pcre2_match_data *match_data = NULL; +pcre2_match_context *match_context = NULL; +size_t match_size; +int r1, r2; +int i; + +if (size < 1) return 0; + +/* Limiting the length of the subject for matching stops fruitless searches +in large trees taking too much time. */ + +match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size; + +/* Figure out some options to use. Initialize the random number to ensure +repeatability. Ensure that we get a 32-bit unsigned random number for testing +options. (RAND_MAX is required to be at least 32767, but is commonly +2147483647, which excludes the top bit.) */ + +srand((unsigned int)(data[size/2])); +r1 = rand(); +r2 = rand(); + +/* Ensure that all undefined option bits are zero (waste of time trying them) +and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the +input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no +reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because +\C in random patterns is highly likely to cause a crash. */ + +compile_options = + ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) | + PCRE2_NEVER_BACKSLASH_C; + +match_options = + ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS); + +/* Do the compile with and without the options, and after a successful compile, +likewise do the match with and without the options. */ + +for (i = 0; i < 2; i++) + { + uint32_t callout_count; + int errorcode; + PCRE2_SIZE erroroffset; + pcre2_code *code; + +#ifdef STANDALONE + printf("Compile options %.8x never_backslash_c", compile_options); + printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "", + ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "", + ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "", + ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "", + ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "", + ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "", + ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "", + ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "", + ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "", + ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "", + ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "", + ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "", + ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "", + ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "", + ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "", + ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "", + ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "", + ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "", + ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "", + ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "", + ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "", + ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "", + ((compile_options & PCRE2_UTF) != 0)? ",utf" : ""); +#endif + + code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options, + &errorcode, &erroroffset, NULL); + + /* Compilation succeeded */ + + if (code != NULL) + { + int j; + uint32_t save_match_options = match_options; + + /* Create match data and context blocks only when we first need them. Set + low match and recursion limits to avoid wasting too much searching large + pattern trees. Almost all matches are going to fail. */ + + if (match_data == NULL) + { + match_data = pcre2_match_data_create(32, NULL); + if (match_data == NULL) + { +#ifdef STANDALONE + printf("** Failed to create match data block\n"); +#endif + return 0; + } + } + + if (match_context == NULL) + { + match_context = pcre2_match_context_create(NULL); + if (match_context == NULL) + { +#ifdef STANDALONE + printf("** Failed to create match context block\n"); +#endif + return 0; + } + (void)pcre2_set_match_limit(match_context, 100); + (void)pcre2_set_recursion_limit(match_context, 100); + (void)pcre2_set_callout(match_context, callout_function, &callout_count); + } + + /* Match twice, with and without options */ + + for (j = 0; j < 2; j++) + { +#ifdef STANDALONE + printf("Match options %.8x", match_options); + printf("%s%s%s%s%s%s%s%s\n", + ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "", + ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "", + ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "", + ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "", + ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "", + ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); +#endif + + callout_count = 0; + errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0, + match_options, match_data, match_context); + +#ifdef STANDALONE + if (errorcode >= 0) printf("Match returned %d\n", errorcode); else + { + unsigned char buffer[256]; + pcre2_get_error_message(errorcode, buffer, 256); + printf("Match failed: error %d: %s\n", errorcode, buffer); + } +#endif + + match_options = 0; /* For second time */ + } + + match_options = save_match_options; /* Reset for the second compile */ + pcre2_code_free(code); + } + + /* Compilation failed */ + + else + { + unsigned char buffer[256]; + pcre2_get_error_message(errorcode, buffer, 256); +#ifdef STANDALONE + printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer); +#else + if (strstr((const char *)buffer, "internal error") != NULL) abort(); +#endif + } + + compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */ + } + +if (match_data != NULL) pcre2_match_data_free(match_data); +if (match_context != NULL) pcre2_match_context_free(match_context); + +return 0; +} + + +/* Optional main program. */ + +#ifdef STANDALONE +int main(int argc, char **argv) +{ +int i; + +if (argc < 2) + { + printf("** No arguments given\n"); + return 0; + } + +for (i = 1; i < argc; i++) + { + size_t filelen; + size_t readsize; + unsigned char *buffer; + FILE *f; + + /* Handle a literal string. Copy to an exact size buffer so that checks for + overrunning work. */ + + if (argv[i][0] == '=') + { + readsize = strlen(argv[i]) - 1; + printf("------ ------\n"); + printf("Length = %lu\n", readsize); + printf("%.*s\n", (int)readsize, argv[i]+1); + buffer = (unsigned char *)malloc(readsize); + if (buffer == NULL) + printf("** Failed to allocate %lu bytes of memory\n", readsize); + else + { + memcpy(buffer, argv[i]+1, readsize); + LLVMFuzzerTestOneInput(buffer, readsize); + free(buffer); + } + continue; + } + + /* Handle a string given in a file */ + + f = fopen(argv[i], "rb"); + if (f == NULL) + { + printf("** Failed to open %s: %s\n", argv[i], strerror(errno)); + continue; + } + + printf("------ %s ------\n", argv[i]); + + fseek(f, 0, SEEK_END); + filelen = ftell(f); + fseek(f, 0, SEEK_SET); + + buffer = (unsigned char *)malloc(filelen); + if (buffer == NULL) + { + printf("** Failed to allocate %lu bytes of memory\n", filelen); + fclose(f); + continue; + } + + readsize = fread(buffer, 1, filelen, f); + fclose(f); + + if (readsize != filelen) + printf("** File size is %lu but fread() returned %lu\n", filelen, readsize); + else + { + printf("Length = %lu\n", filelen); + LLVMFuzzerTestOneInput(buffer, filelen); + } + free(buffer); + } + +return 0; +} +#endif /* STANDALONE */ + +/* End */ diff --git a/pcre2/src/pcre2_internal.h b/pcre2/src/pcre2_internal.h index a4cf1e08c..6a8774ce8 100644 --- a/pcre2/src/pcre2_internal.h +++ b/pcre2/src/pcre2_internal.h @@ -2,12 +2,12 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* PCRE is a library of functions to support regular expressions whose syntax +/* PCRE2 is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -39,7 +39,10 @@ POSSIBILITY OF SUCH DAMAGE. */ /* We do not support both EBCDIC and Unicode at the same time. The "configure" -script prevents both being selected, but not everybody uses "configure". */ +script prevents both being selected, but not everybody uses "configure". EBCDIC +is only supported for the 8-bit library, but the check for this has to be later +in this file, because the first part is not width-dependent, and is included by +pcre2test.c with CODE_UNIT_WIDTH == 0. */ #if defined EBCDIC && defined SUPPORT_UNICODE #error The use of both EBCDIC and SUPPORT_UNICODE is not supported. @@ -70,6 +73,14 @@ typedef int BOOL; #include #endif +/* Older versions of MSVC lack snprintf(). This define allows for +warning/error-free compilation and testing with MSVC compilers back to at least +MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define snprintf _snprintf +#endif + /* When compiling a DLL for Windows, the exported symbols have to be declared using some MS magic. I found some useful information on this web page: http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the @@ -131,20 +142,6 @@ pcre2_match() because of the way it backtracks. */ #define PCRE2_SPTR CUSTOM_SUBJECT_PTR #endif -/* When compiling with the MSVC compiler, it is sometimes necessary to include -a "calling convention" before exported function names. (This is secondhand -information; I know nothing about MSVC myself). For example, something like - - void __cdecl function(....) - -might be needed. In order so make this easy, all the exported functions have -PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not -set, we ensure here that it has no effect. */ - -#ifndef PCRE2_CALL_CONVENTION -#define PCRE2_CALL_CONVENTION -#endif - /* When checking for integer overflow in pcre2_compile(), we need to handle large integers. If a 64-bit integer type is available, we can use that. Otherwise we have to cast to double, which of course requires floating point @@ -166,7 +163,7 @@ by "configure". */ #endif /* When compiling for use with the Virtual Pascal compiler, these functions -need to have their names changed. PCRE must be compiled with the -DVPCOMPAT +need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT option on the command line. */ #ifdef VPCOMPAT @@ -189,7 +186,7 @@ neither (there some non-Unix environments where this is the case). */ #define memmove(a, b, c) bcopy(b, a, c) #else /* HAVE_BCOPY */ static void * -pcre_memmove(void *d, const void *s, size_t n) +pcre2_memmove(void *d, const void *s, size_t n) { size_t i; unsigned char *dest = (unsigned char *)d; @@ -207,7 +204,7 @@ else return (void *)(dest - n); } } -#define memmove(a, b, c) pcre_memmove(a, b, c) +#define memmove(a, b, c) pcre2_memmove(a, b, c) #endif /* not HAVE_BCOPY */ #endif /* not HAVE_MEMMOVE */ #endif /* not VPCOMPAT */ @@ -231,8 +228,15 @@ Unicode doesn't go beyond 0x0010ffff. */ #define MAX_UTF_CODE_POINT 0x10ffff -/* Compile-time errors are added to this value. As they are documented, it -should probably never be changed. */ +/* Compile-time positive error numbers (all except UTF errors, which are +negative) start at this value. It should probably never be changed, in case +some application is checking for specific numbers. There is a copy of this +#define in pcre2posix.c (which now no longer includes this file). Ideally, a +way of having a single definition should be found, but as the number is +unlikely to change, this is not a pressing issue. The original reason for +having a base other than 0 was to keep the absolute values of compile-time and +run-time error numbers numerically different, but in the event the code does +not rely on this. */ #define COMPILE_ERROR_BASE 100 @@ -266,21 +270,21 @@ advancing the pointer. */ #define GETUTF8(c, eptr) \ { \ - if ((c & 0x20) == 0) \ - c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \ - else if ((c & 0x10) == 0) \ - c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ - else if ((c & 0x08) == 0) \ - c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \ - ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \ - else if ((c & 0x04) == 0) \ - c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \ - ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \ - (eptr[4] & 0x3f); \ + if ((c & 0x20u) == 0) \ + c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ + else if ((c & 0x10u) == 0) \ + c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ + else if ((c & 0x08u) == 0) \ + c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ + ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ + else if ((c & 0x04u) == 0) \ + c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ + ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ + (eptr[4] & 0x3fu); \ else \ - c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \ - ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \ - ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \ + c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ + ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ + ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ } /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing @@ -288,31 +292,31 @@ the pointer. */ #define GETUTF8INC(c, eptr) \ { \ - if ((c & 0x20) == 0) \ - c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \ - else if ((c & 0x10) == 0) \ + if ((c & 0x20u) == 0) \ + c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \ + else if ((c & 0x10u) == 0) \ { \ - c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \ + c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \ eptr += 2; \ } \ - else if ((c & 0x08) == 0) \ + else if ((c & 0x08u) == 0) \ { \ - c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \ - ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ + c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \ + ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ eptr += 3; \ } \ - else if ((c & 0x04) == 0) \ + else if ((c & 0x04u) == 0) \ { \ - c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \ - ((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \ - (eptr[3] & 0x3f); \ + c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \ + ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \ + (eptr[3] & 0x3fu); \ eptr += 4; \ } \ else \ { \ - c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \ - ((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \ - ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \ + c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \ + ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \ + ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \ eptr += 5; \ } \ } @@ -322,34 +326,34 @@ advancing the pointer, incrementing the length. */ #define GETUTF8LEN(c, eptr, len) \ { \ - if ((c & 0x20) == 0) \ + if ((c & 0x20u) == 0) \ { \ - c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \ + c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ len++; \ } \ - else if ((c & 0x10) == 0) \ + else if ((c & 0x10u) == 0) \ { \ - c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \ + c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ len += 2; \ } \ - else if ((c & 0x08) == 0) \ + else if ((c & 0x08u) == 0) \ {\ - c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \ - ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \ + c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ + ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ len += 3; \ } \ - else if ((c & 0x04) == 0) \ + else if ((c & 0x04u) == 0) \ { \ - c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \ - ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \ - (eptr[4] & 0x3f); \ + c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ + ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ + (eptr[4] & 0x3fu); \ len += 4; \ } \ else \ {\ - c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \ - ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \ - ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \ + c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ + ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ + ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ len += 5; \ } \ } @@ -379,7 +383,7 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */ /* Character U+180E (Mongolian Vowel Separator) is not included in the list of spaces in the Unicode file PropList.txt, and Perl does not recognize it as a space. However, in many other sources it is listed as a space and has been in -PCRE for a long time. */ +PCRE (both APIs) for a long time. */ #define HSPACE_LIST \ CHAR_HT, CHAR_SPACE, CHAR_NBSP, \ @@ -524,9 +528,11 @@ bytes in a code unit in that mode. */ #define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ #define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */ #define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */ -#define PCRE2_DEREF_TABLES 0x00040000 /* Release character tables. */ +#define PCRE2_DEREF_TABLES 0x00040000 /* release character tables */ #define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ #define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ +#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ +#define PCRE2_HASBKC 0x00400000 /* contains \C */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) @@ -545,17 +551,9 @@ req_unit match. */ #define REQ_CU_MAX 1000 -/* Bit definitions for entries in the pcre_ctypes table. */ - -#define ctype_space 0x01 -#define ctype_letter 0x02 -#define ctype_digit 0x04 -#define ctype_xdigit 0x08 -#define ctype_word 0x10 /* alphanumeric or '_' */ -#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ - -/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set -of bits for a class map. Some classes are built by combining these tables. */ +/* Offsets for the bitmap tables in the cbits set of tables. Each table +contains a set of bits for a class map. Some classes are built by combining +these tables. */ #define cbit_space 0 /* [:space:] or \s */ #define cbit_xdigit 32 /* [:xdigit:] */ @@ -569,19 +567,28 @@ of bits for a class map. Some classes are built by combining these tables. */ #define cbit_cntrl 288 /* [:cntrl:] */ #define cbit_length 320 /* Length of the cbits table */ -/* Offsets of the various tables from the base tables pointer, and -total length. */ +/* Bit definitions for entries in the ctypes table. */ -#define lcc_offset 0 -#define fcc_offset 256 -#define cbits_offset 512 -#define ctypes_offset (cbits_offset + cbit_length) +#define ctype_space 0x01 +#define ctype_letter 0x02 +#define ctype_digit 0x04 +#define ctype_xdigit 0x08 +#define ctype_word 0x10 /* alphanumeric or '_' */ +#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ + +/* Offsets of the various tables from the base tables pointer, and +total length of the tables. */ + +#define lcc_offset 0 /* Lower case */ +#define fcc_offset 256 /* Flip case */ +#define cbits_offset 512 /* Character classes */ +#define ctypes_offset (cbits_offset + cbit_length) /* Character types */ #define tables_length (ctypes_offset + 256) /* -------------------- Character and string names ------------------------ */ -/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal +/* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal character constants like '*' because the compiler would emit their EBCDIC code, which is different from their ASCII/UTF-8 code. Instead we define macros for the characters so that they always use the ASCII/UTF-8 code when UTF-8 support @@ -589,7 +596,7 @@ is enabled. When UTF-8 support is not enabled, the definitions use character literals. Both character and string versions of each character are needed, and there are some longer strings as well. -This means that, on EBCDIC platforms, the PCRE library can handle either +This means that, on EBCDIC platforms, the PCRE2 library can handle either EBCDIC, or UTF-8, but not both. To support both in the same compiled library would need different lookups depending on whether PCRE2_UTF was set or not. This would make it impossible to use characters in switch/case statements, @@ -601,7 +608,7 @@ macros to give the functions distinct names. */ #ifndef SUPPORT_UNICODE /* UTF-8 support is not enabled; use the platform-dependent character literals -so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF +so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF mode. Newline characters are problematic in EBCDIC. Though it has CR and LF characters, a common practice has been to use its NL (0x15) character as the line terminator in C-like processing environments. However, sometimes the LF @@ -609,7 +616,7 @@ line terminator in C-like processing environments. However, sometimes the LF http://unicode.org/standard/reports/tr13/tr13-5.html -PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25 +PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25 instead. Whichever is *not* chosen is defined as NEL. In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the @@ -917,6 +924,7 @@ a positive value. */ #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" +#define STRING_MARK "MARK" #else /* SUPPORT_UNICODE */ @@ -1189,6 +1197,7 @@ only. */ #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN +#define STRING_MARK STR_M STR_A STR_R STR_K #endif /* SUPPORT_UNICODE */ @@ -1212,7 +1221,7 @@ only. */ #define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ /* The following special properties are used only in XCLASS items, when POSIX -classes are specified and PCRE_UCP is set - in other words, for Unicode +classes are specified and PCRE2_UCP is set - in other words, for Unicode handling of these classes. They are not available via the \p or \P escapes like those in the above list, and so they do not take part in the autopossessifying table. */ @@ -1275,23 +1284,16 @@ mode rather than an escape sequence. It is also used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves like \N. -The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. -when PCRE_UCP is set and replacement of \d etc by \p sequences is required. -They must be contiguous, and remain in order so that the replacements can be -looked up from a table. - Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in -check_escape(). There are two tests in the code for an escape -greater than ESC_b and less than ESC_Z to detect the types that may be -repeated. These are the types that consume characters. If any new escapes are -put in between that don't consume a character, that code will have to change. -*/ +check_escape(). There are tests in the code for an escape greater than ESC_b +and less than ESC_Z to detect the types that may be repeated. These are the +types that consume characters. If any new escapes are put in between that don't +consume a character, that code will have to change. */ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, - ESC_E, ESC_Q, ESC_g, ESC_k, - ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu }; + ESC_E, ESC_Q, ESC_g, ESC_k }; /********************** Opcode definitions ******************/ @@ -1301,12 +1303,12 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in order to the list of escapes immediately above. Furthermore, values up to OP_DOLLM must not be changed without adjusting the table called autoposstab in -pcre_compile.c +pcre2_auto_possess.c Whenever this list is updated, the two macro definitions that follow must be updated to match. The possessification table called "opcode_possessify" in -pcre_compile.c must also be updated, and also the tables called "coptable" -and "poptable" in pcre_dfa_exec.c. +pcre2_compile.c must also be updated, and also the tables called "coptable" +and "poptable" in pcre2_dfa_match.c. ****** NOTE NOTE NOTE ******/ @@ -1357,7 +1359,8 @@ enum { OP_CIRC, /* 27 Start of line - not multiline */ OP_CIRCM, /* 28 Start of line - multiline */ - /* Single characters; caseful must precede the caseless ones */ + /* Single characters; caseful must precede the caseless ones, and these + must remain in this order, and adjacent. */ OP_CHAR, /* 29 Match one character, casefully */ OP_CHARI, /* 30 Match one character, caselessly */ @@ -1800,11 +1803,16 @@ typedef struct pcre2_serialized_data { #if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0 +/* EBCDIC is supported only for the 8-bit library. */ + +#if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8 +#error EBCDIC is not supported for the 16-bit or 32-bit libraries +#endif + /* This is the largest non-UTF code point. */ #define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH)) - /* Internal shared data tables and variables. These are used by more than one of the exported public functions. They have to be "external" in the C sense, but are not part of the PCRE2 public API. Although the data for some of them is @@ -1878,11 +1886,12 @@ private structures. */ /* Private "external" functions. These are internal functions that are called from modules other than the one in which they are defined. They have to be -"external" in the C sense, but are not part of the PCRE public API. They are +"external" in the C sense, but are not part of the PCRE2 public API. They are not referenced from pcre2test, and must not be defined when no code unit width is available. */ #define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_) +#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_) #define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_) #define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_) #define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_) @@ -1904,6 +1913,8 @@ is available. */ extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *); +extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *, + int *, uint32_t, BOOL, compile_block *); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); diff --git a/pcre2/src/pcre2_intmodedep.h b/pcre2/src/pcre2_intmodedep.h index f20f71e1e..ebff7e306 100644 --- a/pcre2/src/pcre2_intmodedep.h +++ b/pcre2/src/pcre2_intmodedep.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -72,7 +72,7 @@ just to undefine them all. */ #undef MAX_MARK #undef MAX_PATTERN_SIZE #undef MAX_UTF_SINGLE_CU -#undef NOT_FIRSTCHAR +#undef NOT_FIRSTCU #undef PUT #undef PUT2 #undef PUT2INC @@ -94,7 +94,7 @@ easier to maintain, the storing and loading of offsets from the compiled code unit string is now handled by the macros that are defined here. The macros are controlled by the value of LINK_SIZE. This defaults to 2, but -values of 2 or 4 are also supported. */ +values of 3 or 4 are also supported. */ /* ------------------- 8-bit support ------------------ */ @@ -102,29 +102,29 @@ values of 2 or 4 are also supported. */ #if LINK_SIZE == 2 #define PUT(a,n,d) \ - (a[n] = (d) >> 8), \ - (a[(n)+1] = (d) & 255) + (a[n] = (PCRE2_UCHAR)((d) >> 8)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) & 255)) #define GET(a,n) \ - (((a)[n] << 8) | (a)[(n)+1]) + (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) #define MAX_PATTERN_SIZE (1 << 16) #elif LINK_SIZE == 3 #define PUT(a,n,d) \ - (a[n] = (d) >> 16), \ - (a[(n)+1] = (d) >> 8), \ - (a[(n)+2] = (d) & 255) + (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \ + (a[(n)+2] = (PCRE2_UCHAR)((d) & 255)) #define GET(a,n) \ - (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) + (unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) #define MAX_PATTERN_SIZE (1 << 24) #elif LINK_SIZE == 4 #define PUT(a,n,d) \ - (a[n] = (d) >> 24), \ - (a[(n)+1] = (d) >> 16), \ - (a[(n)+2] = (d) >> 8), \ - (a[(n)+3] = (d) & 255) + (a[n] = (PCRE2_UCHAR)((d) >> 24)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \ + (a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \ + (a[(n)+3] = (PCRE2_UCHAR)((d) & 255)) #define GET(a,n) \ - (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) + (unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ #else @@ -140,7 +140,7 @@ values of 2 or 4 are also supported. */ #undef LINK_SIZE #define LINK_SIZE 1 #define PUT(a,n,d) \ - (a[n] = (d)) + (a[n] = (PCRE2_UCHAR)(d)) #define GET(a,n) \ (a[n]) #define MAX_PATTERN_SIZE (1 << 16) @@ -149,10 +149,10 @@ values of 2 or 4 are also supported. */ #undef LINK_SIZE #define LINK_SIZE 2 #define PUT(a,n,d) \ - (a[n] = (d) >> 16), \ - (a[(n)+1] = (d) & 65535) + (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) & 65535)) #define GET(a,n) \ - (((a)[n] << 16) | (a)[(n)+1]) + (unsigned int)(((a)[n] << 16) | (a)[(n)+1]) #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ #else @@ -200,11 +200,11 @@ arithmetic results in a signed value. Hence the cast. */ #endif /* Other macros that are different for 8-bit mode. The MAX_255 macro checks -whether its argument is less than 256. The maximum length of a MARK name must -fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro -is used to access elements of tables containing exactly 256 items. When code -points can be greater than 255, a check is needed before accessing these -tables. */ +whether its argument, which is assumed to be one code unit, is less than 256. +The maximum length of a MARK name must fit in one code unit; currently it is +set to 255 or 65535. The TABLE_GET macro is used to access elements of tables +containing exactly 256 items. When code points can be greater than 255, a check +is needed before accessing these tables. */ #if PCRE2_CODE_UNIT_WIDTH == 8 #define MAX_255(c) TRUE @@ -252,7 +252,7 @@ UTF support is omitted, we don't even define them. */ /* #define MAX_UTF_SINGLE_CU */ /* #define HAS_EXTRALEN(c) */ /* #define GET_EXTRALEN(c) */ -/* #define NOT_FIRSTCHAR(c) */ +/* #define NOT_FIRSTCU(c) */ #define GETCHAR(c, eptr) c = *eptr; #define GETCHARTEST(c, eptr) c = *eptr; #define GETCHARINC(c, eptr) c = *eptr++; @@ -283,47 +283,47 @@ UTF support is omitted, we don't even define them. */ /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. Otherwise it has an undefined behaviour. */ -#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f]) +#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu]) -/* Returns TRUE, if the given character is not the first character -of a UTF sequence. */ +/* Returns TRUE, if the given value is not the first code unit of a UTF +sequence. */ -#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80) +#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u) /* Get the next UTF-8 character, not advancing the pointer. This is called when we know we are in UTF-8 mode. */ #define GETCHAR(c, eptr) \ c = *eptr; \ - if (c >= 0xc0) GETUTF8(c, eptr); + if (c >= 0xc0u) GETUTF8(c, eptr); /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the pointer. */ #define GETCHARTEST(c, eptr) \ c = *eptr; \ - if (utf && c >= 0xc0) GETUTF8(c, eptr); + if (utf && c >= 0xc0u) GETUTF8(c, eptr); /* Get the next UTF-8 character, advancing the pointer. This is called when we know we are in UTF-8 mode. */ #define GETCHARINC(c, eptr) \ c = *eptr++; \ - if (c >= 0xc0) GETUTF8INC(c, eptr); + if (c >= 0xc0u) GETUTF8INC(c, eptr); /* Get the next character, testing for UTF-8 mode, and advancing the pointer. This is called when we don't know if we are in UTF-8 mode. */ #define GETCHARINCTEST(c, eptr) \ c = *eptr++; \ - if (utf && c >= 0xc0) GETUTF8INC(c, eptr); + if (utf && c >= 0xc0u) GETUTF8INC(c, eptr); /* Get the next UTF-8 character, not advancing the pointer, incrementing length if there are extra bytes. This is called when we know we are in UTF-8 mode. */ #define GETCHARLEN(c, eptr, len) \ c = *eptr; \ - if (c >= 0xc0) GETUTF8LEN(c, eptr, len); + if (c >= 0xc0u) GETUTF8LEN(c, eptr, len); /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the pointer, incrementing length if there are extra bytes. This is called when we @@ -331,21 +331,21 @@ do not know if we are in UTF-8 mode. */ #define GETCHARLENTEST(c, eptr, len) \ c = *eptr; \ - if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len); + if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len); /* If the pointer is not at the start of a character, move it back until it is. This is called only in UTF-8 mode - we don't put a test within the macro because almost all calls are already within a block of UTF-8 only code. */ -#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- +#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr-- /* Same as above, just in the other direction. */ -#define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++ -#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0) == 0x80) eptr++ +#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++ +#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++ /* Same as above, but it allows a fully customizable form. */ #define ACROSSCHAR(condition, eptr, action) \ - while((condition) && ((eptr) & 0xc0) == 0x80) action + while((condition) && ((eptr) & 0xc0u) == 0x80u) action /* Deposit a character into memory, returning the number of code units. */ @@ -364,63 +364,63 @@ because almost all calls are already within a block of UTF-8 only code. */ /* Tests whether the code point needs extra characters to decode. */ -#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800) +#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u) /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. Otherwise it has an undefined behaviour. */ #define GET_EXTRALEN(c) 1 -/* Returns TRUE, if the given character is not the first character -of a UTF sequence. */ +/* Returns TRUE, if the given value is not the first code unit of a UTF +sequence. */ -#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00) +#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u) /* Base macro to pick up the low surrogate of a UTF-16 character, not advancing the pointer. */ #define GETUTF16(c, eptr) \ - { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; } + { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; } /* Get the next UTF-16 character, not advancing the pointer. This is called when we know we are in UTF-16 mode. */ #define GETCHAR(c, eptr) \ c = *eptr; \ - if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr); + if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); /* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the pointer. */ #define GETCHARTEST(c, eptr) \ c = *eptr; \ - if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr); + if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); /* Base macro to pick up the low surrogate of a UTF-16 character, advancing the pointer. */ #define GETUTF16INC(c, eptr) \ - { c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; } + { c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; } /* Get the next UTF-16 character, advancing the pointer. This is called when we know we are in UTF-16 mode. */ #define GETCHARINC(c, eptr) \ c = *eptr++; \ - if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); + if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); /* Get the next character, testing for UTF-16 mode, and advancing the pointer. This is called when we don't know if we are in UTF-16 mode. */ #define GETCHARINCTEST(c, eptr) \ c = *eptr++; \ - if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); + if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); /* Base macro to pick up the low surrogate of a UTF-16 character, not advancing the pointer, incrementing the length. */ #define GETUTF16LEN(c, eptr, len) \ - { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; } + { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; } /* Get the next UTF-16 character, not advancing the pointer, incrementing length if there is a low surrogate. This is called when we know we are in @@ -428,7 +428,7 @@ UTF-16 mode. */ #define GETCHARLEN(c, eptr, len) \ c = *eptr; \ - if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); + if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); /* Get the next UTF-816character, testing for UTF-16 mode, not advancing the pointer, incrementing length if there is a low surrogate. This is called when @@ -436,22 +436,22 @@ we do not know if we are in UTF-16 mode. */ #define GETCHARLENTEST(c, eptr, len) \ c = *eptr; \ - if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); + if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); /* If the pointer is not at the start of a character, move it back until it is. This is called only in UTF-16 mode - we don't put a test within the macro because almost all calls are already within a block of UTF-16 only code. */ -#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr-- +#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr-- /* Same as above, just in the other direction. */ -#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++ -#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00) == 0xdc00) eptr++ +#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++ +#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++ /* Same as above, but it allows a fully customizable form. */ #define ACROSSCHAR(condition, eptr, action) \ - if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action + if ((condition) && ((eptr) & 0xfc00u) == 0xdc00u) action /* Deposit a character into memory, returning the number of code units. */ @@ -469,7 +469,7 @@ into one PCRE2_UCHAR unit. */ #define MAX_UTF_SINGLE_CU (0x10ffffu) #define HAS_EXTRALEN(c) (0) #define GET_EXTRALEN(c) (0) -#define NOT_FIRSTCHAR(c) (0) +#define NOT_FIRSTCU(c) (0) /* Get the next UTF-32 character, not advancing the pointer. This is called when we know we are in UTF-32 mode. */ @@ -562,6 +562,7 @@ typedef struct pcre2_real_compile_context { int (*stack_guard)(uint32_t, void *); void *stack_guard_data; const uint8_t *tables; + PCRE2_SIZE max_pattern_length; uint16_t bsr_convention; uint16_t newline_convention; uint32_t parens_nest_limit; @@ -580,6 +581,7 @@ typedef struct pcre2_real_match_context { #endif int (*callout)(pcre2_callout_block *, void *); void *callout_data; + PCRE2_SIZE offset_limit; uint32_t match_limit; uint32_t recursion_limit; } pcre2_real_match_context; @@ -588,11 +590,17 @@ typedef struct pcre2_real_match_context { defined specially because it is required in pcre2_serialize_decode() when copying the size from possibly unaligned memory into a variable of the same type. Use a macro rather than a typedef to avoid compiler warnings when this -file is included multiple times by pcre2test. */ +file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the +largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit +argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field +here.) */ #undef CODE_BLOCKSIZE_TYPE #define CODE_BLOCKSIZE_TYPE size_t +#undef LOOKBEHIND_MAX +#define LOOKBEHIND_MAX UINT16_MAX + typedef struct pcre2_real_code { pcre2_memctl memctl; /* Memory control fields */ const uint8_t *tables; /* The character tables */ @@ -640,13 +648,26 @@ typedef struct pcre2_real_match_data { #ifndef PCRE2_PCRE2TEST -/* Structure for checking for mutual recursion when scanning compiled code. */ +/* Structures for checking for mutual recursion when scanning compiled or +parsed code. */ typedef struct recurse_check { struct recurse_check *prev; PCRE2_SPTR group; } recurse_check; +typedef struct parsed_recurse_check { + struct parsed_recurse_check *prev; + uint32_t *groupptr; +} parsed_recurse_check; + +/* Structure for building a cache when filling in recursion offsets. */ + +typedef struct recurse_cache { + PCRE2_SPTR group; + int groupnumber; +} recurse_cache; + /* Structure for maintaining a chain of pointers to the currently incomplete branches, for testing for left recursion while compiling. */ @@ -678,9 +699,10 @@ typedef struct compile_block { PCRE2_SPTR start_code; /* The start of the compiled code */ PCRE2_SPTR start_pattern; /* The start of the pattern */ PCRE2_SPTR end_pattern; /* The end of the pattern */ - PCRE2_UCHAR *hwm; /* High watermark of workspace */ PCRE2_UCHAR *name_table; /* The name/number table */ - size_t workspace_size; /* Size of workspace */ + PCRE2_SIZE workspace_size; /* Size of workspace */ + PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */ + PCRE2_SIZE erroroffset; /* Offset of error in pattern */ uint16_t names_found; /* Number of entries so far */ uint16_t name_entry_size; /* Size of each entry */ open_capitem *open_caps; /* Chain of open capture items */ @@ -688,12 +710,17 @@ typedef struct compile_block { uint32_t named_group_list_size; /* Number of entries in the list */ uint32_t external_options; /* External (initial) options */ uint32_t external_flags; /* External flag bits to be set */ - uint32_t bracount; /* Count of capturing parens as we compile */ - uint32_t final_bracount; /* Saved value after first pass */ + uint32_t bracount; /* Count of capturing parentheses */ + uint32_t lastcapture; /* Last capture encountered */ + uint32_t *parsed_pattern; /* Parsed pattern buffer */ + uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */ + uint32_t *groupinfo; /* Group info vector */ uint32_t top_backref; /* Maximum back reference */ uint32_t backref_map; /* Bitmap of low back refs */ uint32_t nltype; /* Newline type */ uint32_t nllen; /* Newline string length */ + uint32_t class_range_start; /* Overall class range start */ + uint32_t class_range_end; /* Overall class range end */ PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ int max_lookbehind; /* Maximum lookbehind (characters) */ int parens_depth; /* Depth of nested parentheses */ @@ -701,9 +728,8 @@ typedef struct compile_block { int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ - BOOL check_lookbehind; /* Lookbehinds need later checking */ + BOOL had_recurse; /* Had a recursion or subroutine call */ BOOL dupnames; /* Duplicate names exist */ - BOOL iscondassert; /* Next assert is a condition */ } compile_block; /* Structure for keeping the properties of the in-memory stack used @@ -819,6 +845,7 @@ typedef struct dfa_match_block { PCRE2_SPTR last_used_ptr; /* Latest consulted character */ const uint8_t *tables; /* Character tables */ PCRE2_SIZE start_offset; /* The start offset value */ + uint32_t match_limit_recursion; /* As it says */ uint32_t moptions; /* Match options */ uint32_t poptions; /* Pattern options */ uint32_t nltype; /* Newline type */ diff --git a/pcre2/src/pcre2_jit_compile.c b/pcre2/src/pcre2_jit_compile.c index 272ab2857..8dea90a1c 100644 --- a/pcre2/src/pcre2_jit_compile.c +++ b/pcre2/src/pcre2_jit_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -38,7 +38,6 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ - #ifdef HAVE_CONFIG_H #include "config.h" #endif @@ -186,9 +185,10 @@ typedef struct jit_arguments { int (*callout)(pcre2_callout_block *, void *); void *callout_data; /* Everything else after. */ - sljit_ui limit_match; - uint32_t oveccount; - uint32_t options; + sljit_uw offset_limit; + sljit_u32 limit_match; + sljit_u32 oveccount; + sljit_u32 options; } jit_arguments; #define JIT_NUMBER_OF_COMPILE_MODES 3 @@ -197,8 +197,8 @@ typedef struct executable_functions { void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES]; sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; - sljit_ui top_bracket; - sljit_ui limit_match; + sljit_u32 top_bracket; + sljit_u32 limit_match; } executable_functions; typedef struct jump_list { @@ -349,43 +349,48 @@ typedef struct compiler_common { /* First byte code. */ PCRE2_SPTR start; /* Maps private data offset to each opcode. */ - sljit_si *private_data_ptrs; + sljit_s32 *private_data_ptrs; /* Chain list of read-only data ptrs. */ void *read_only_data_head; /* Tells whether the capturing bracket is optimized. */ - sljit_ub *optimized_cbracket; + sljit_u8 *optimized_cbracket; /* Tells whether the starting offset is a target of then. */ - sljit_ub *then_offsets; + sljit_u8 *then_offsets; /* Current position where a THEN must jump. */ then_trap_backtrack *then_trap; /* Starting offset of private data for capturing brackets. */ - int cbra_ptr; + sljit_s32 cbra_ptr; /* Output vector starting point. Must be divisible by 2. */ - int ovector_start; + sljit_s32 ovector_start; /* Points to the starting character of the current match. */ - int start_ptr; + sljit_s32 start_ptr; /* Last known position of the requested byte. */ - int req_char_ptr; + sljit_s32 req_char_ptr; /* Head of the last recursion. */ - int recursive_head_ptr; + sljit_s32 recursive_head_ptr; /* First inspected character for partial matching. (Needed for avoiding zero length partial matches.) */ - int start_used_ptr; + sljit_s32 start_used_ptr; /* Starting pointer for partial soft matches. */ - int hit_start; - /* End pointer of the first line. */ - int first_line_end; + sljit_s32 hit_start; + /* Pointer of the match end position. */ + sljit_s32 match_end_ptr; /* Points to the marked string. */ - int mark_ptr; + sljit_s32 mark_ptr; /* Recursive control verb management chain. */ - int control_head_ptr; + sljit_s32 control_head_ptr; /* Points to the last matched capture block index. */ - int capture_last_ptr; + sljit_s32 capture_last_ptr; + /* Fast forward skipping byte code pointer. */ + PCRE2_SPTR fast_forward_bc_ptr; + /* Locals used by fast fail optimization. */ + sljit_s32 fast_fail_start_ptr; + sljit_s32 fast_fail_end_ptr; /* Flipped and lower case tables. */ - const sljit_ub *fcc; + const sljit_u8 *fcc; sljit_sw lcc; - /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ + /* Mode can be PCRE2_JIT_COMPLETE and others. */ int mode; /* TRUE, when minlength is greater than 0. */ BOOL might_be_empty; @@ -395,18 +400,20 @@ typedef struct compiler_common { BOOL has_skip_arg; /* (*THEN) is found in the pattern. */ BOOL has_then; + /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */ + BOOL has_skip_in_assert_back; /* Currently in recurse or negative assert. */ BOOL local_exit; /* Currently in a positive assert. */ BOOL positive_assert; /* Newline control. */ int nltype; - sljit_ui nlmax; - sljit_ui nlmin; + sljit_u32 nlmax; + sljit_u32 nlmin; int newline; int bsr_nltype; - sljit_ui bsr_nlmax; - sljit_ui bsr_nlmin; + sljit_u32 bsr_nlmax; + sljit_u32 bsr_nlmin; /* Dollar endonly. */ int endonly; /* Tables. */ @@ -463,27 +470,27 @@ typedef struct compare_context { #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED int ucharptr; union { - sljit_si asint; - sljit_uh asushort; + sljit_s32 asint; + sljit_u16 asushort; #if PCRE2_CODE_UNIT_WIDTH == 8 - sljit_ub asbyte; - sljit_ub asuchars[4]; + sljit_u8 asbyte; + sljit_u8 asuchars[4]; #elif PCRE2_CODE_UNIT_WIDTH == 16 - sljit_uh asuchars[2]; + sljit_u16 asuchars[2]; #elif PCRE2_CODE_UNIT_WIDTH == 32 - sljit_ui asuchars[1]; + sljit_u32 asuchars[1]; #endif } c; union { - sljit_si asint; - sljit_uh asushort; + sljit_s32 asint; + sljit_u16 asushort; #if PCRE2_CODE_UNIT_WIDTH == 8 - sljit_ub asbyte; - sljit_ub asuchars[4]; + sljit_u8 asbyte; + sljit_u8 asuchars[4]; #elif PCRE2_CODE_UNIT_WIDTH == 16 - sljit_uh asuchars[2]; + sljit_u16 asuchars[2]; #elif PCRE2_CODE_UNIT_WIDTH == 32 - sljit_ui asuchars[1]; + sljit_u32 asuchars[1]; #endif } oc; #endif @@ -525,19 +532,19 @@ the start pointers when the end of the capturing group has not yet reached. */ #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) #if PCRE2_CODE_UNIT_WIDTH == 8 -#define MOV_UCHAR SLJIT_MOV_UB -#define MOVU_UCHAR SLJIT_MOVU_UB +#define MOV_UCHAR SLJIT_MOV_U8 +#define MOVU_UCHAR SLJIT_MOVU_U8 #define IN_UCHARS(x) (x) #elif PCRE2_CODE_UNIT_WIDTH == 16 -#define MOV_UCHAR SLJIT_MOV_UH -#define MOVU_UCHAR SLJIT_MOVU_UH +#define MOV_UCHAR SLJIT_MOV_U16 +#define MOVU_UCHAR SLJIT_MOVU_U16 #define UCHAR_SHIFT (1) -#define IN_UCHARS(x) ((x) << UCHAR_SHIFT) +#define IN_UCHARS(x) ((x) * 2) #elif PCRE2_CODE_UNIT_WIDTH == 32 -#define MOV_UCHAR SLJIT_MOV_UI -#define MOVU_UCHAR SLJIT_MOVU_UI +#define MOV_UCHAR SLJIT_MOV_U32 +#define MOVU_UCHAR SLJIT_MOVU_U32 #define UCHAR_SHIFT (2) -#define IN_UCHARS(x) ((x) << UCHAR_SHIFT) +#define IN_UCHARS(x) ((x) * 4) #else #error Unsupported compiling mode #endif @@ -593,11 +600,6 @@ SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); return count; } -static int ones_in_half_byte[16] = { - /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3, - /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4 -}; - /* Functions whose might need modification for all new supported opcodes: next_opcode check_opcode_types @@ -813,6 +815,7 @@ static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPT { int count; PCRE2_SPTR slot; +PCRE2_SPTR assert_back_end = cc - 1; /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) @@ -884,6 +887,13 @@ while (cc < ccend) cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); break; + case OP_ASSERTBACK: + slot = bracketend(cc); + if (slot > assert_back_end) + assert_back_end = slot; + cc += 1 + LINK_SIZE; + break; + case OP_THEN_ARG: common->has_then = TRUE; common->control_head_ptr = 1; @@ -905,9 +915,17 @@ while (cc < ccend) cc += 1; break; + case OP_SKIP: + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; + cc += 1; + break; + case OP_SKIP_ARG: common->control_head_ptr = 1; common->has_skip_arg = TRUE; + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; cc += 1 + 2 + cc[1]; break; @@ -921,10 +939,189 @@ while (cc < ccend) return TRUE; } +static BOOL is_accelerated_repeat(PCRE2_SPTR cc) +{ +switch(*cc) + { + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI); + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSSTAR: + case OP_POSPLUS: + + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSSTARI: + case OP_POSPLUSI: + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + return TRUE; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR))); +#else + cc += (1 + (32 / sizeof(PCRE2_UCHAR))); +#endif + + switch(*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + return TRUE; + } + break; + } +return FALSE; +} + +static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start) +{ +PCRE2_SPTR cc = common->start; +PCRE2_SPTR end; + +/* Skip not repeated brackets. */ +while (TRUE) + { + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + /* Zero width assertions. */ + cc++; + continue; + } + + if (*cc != OP_BRA && *cc != OP_CBRA) + break; + + end = cc + GET(cc, 1); + if (*end != OP_KET || PRIVATE_DATA(end) != 0) + return FALSE; + if (*cc == OP_CBRA) + { + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + return FALSE; + cc += IMM2_SIZE; + } + cc += 1 + LINK_SIZE; + } + +if (is_accelerated_repeat(cc)) + { + common->fast_forward_bc_ptr = cc; + common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; + *private_data_start += sizeof(sljit_sw); + return TRUE; + } +return FALSE; +} + +static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth) +{ + PCRE2_SPTR next_alt; + + SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); + + if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + return; + + next_alt = bracketend(cc) - (1 + LINK_SIZE); + if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0) + return; + + do + { + next_alt = cc + GET(cc, 1); + + cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); + + while (TRUE) + { + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + /* Zero width assertions. */ + cc++; + continue; + } + break; + } + + if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA)) + detect_fast_fail(common, cc, private_data_start, depth - 1); + + if (is_accelerated_repeat(cc)) + { + common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; + + if (common->fast_fail_start_ptr == 0) + common->fast_fail_start_ptr = *private_data_start; + + *private_data_start += sizeof(sljit_sw); + common->fast_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return; + } + + cc = next_alt; + } + while (*cc == OP_ALT); +} + static int get_class_iterator_size(PCRE2_SPTR cc) { -sljit_ui min; -sljit_ui max; +sljit_u32 min; +sljit_u32 max; switch(*cc) { case OP_CRSTAR: @@ -961,7 +1158,7 @@ PCRE2_SPTR next_end; PCRE2_SPTR max_end; PCRE2_UCHAR type; sljit_sw length = end - begin; -sljit_si min, max, i; +sljit_s32 min, max, i; /* Detect fixed iterations first. */ if (end[-(1 + LINK_SIZE)] != OP_KET) @@ -1097,6 +1294,7 @@ PCRE2_SPTR alternative; PCRE2_SPTR end = NULL; int private_data_ptr = *private_data_start; int space, size, bracketlen; +BOOL repeat_check = TRUE; while (cc < ccend) { @@ -1106,7 +1304,8 @@ while (cc < ccend) if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) break; - if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND) + if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) + { if (detect_repeat(common, cc)) { /* These brackets are converted to repeats, so no global @@ -1114,6 +1313,8 @@ while (cc < ccend) if (cc >= end) end = bracketend(cc); } + } + repeat_check = TRUE; switch(*cc) { @@ -1169,6 +1370,13 @@ while (cc < ccend) bracketlen = 1 + LINK_SIZE + IMM2_SIZE; break; + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + repeat_check = FALSE; + size = 1; + break; + CASE_ITERATOR_PRIVATE_DATA_1 space = 1; size = -2; @@ -1208,14 +1416,14 @@ while (cc < ccend) case OP_CLASS: case OP_NCLASS: - size = 1 + 32 / sizeof(PCRE2_UCHAR); space = get_class_iterator_size(cc + size); + size = 1 + 32 / sizeof(PCRE2_UCHAR); break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: - size = GET(cc, 1); space = get_class_iterator_size(cc + size); + size = GET(cc, 1); break; #endif @@ -1354,6 +1562,13 @@ while (cc < ccend) cc += 1 + LINK_SIZE + IMM2_SIZE; break; + case OP_THEN: + stack_restore = TRUE; + if (common->control_head_ptr != 0) + *needs_control_head = TRUE; + cc ++; + break; + default: stack_restore = TRUE; /* Fall through. */ @@ -2008,7 +2223,7 @@ if (save) SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); } -static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_ub *current_offset) +static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset) { PCRE2_SPTR end = bracketend(cc); BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; @@ -2146,6 +2361,7 @@ add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); static SLJIT_INLINE void free_stack(compiler_common *common, int size) { DEFINE_COMPILER; + SLJIT_ASSERT(size > 0); OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); } @@ -2174,7 +2390,7 @@ static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) { DEFINE_COMPILER; struct sljit_label *loop; -int i; +sljit_s32 i; /* At this point we can freely use all temporary registers. */ SLJIT_ASSERT(length > 1); @@ -2196,6 +2412,18 @@ else } } +static SLJIT_INLINE void reset_fast_fail(compiler_common *common) +{ +DEFINE_COMPILER; +sljit_s32 i; + +SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr); + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0); +} + static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) { DEFINE_COMPILER; @@ -2249,6 +2477,7 @@ while (current != NULL) SLJIT_ASSERT_STOP(); break; } + SLJIT_ASSERT(current > (sljit_sw*)current[-1]); current = (sljit_sw*)current[-1]; } return -1; @@ -2267,7 +2496,7 @@ OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); if (common->mark_ptr != 0) OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); -OP1(SLJIT_MOV_UI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); +OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); if (common->mark_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); @@ -2286,7 +2515,7 @@ OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8); if (sizeof(PCRE2_SIZE) == 4) - OP1(SLJIT_MOVU_UI, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0); + OP1(SLJIT_MOVU_U32, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0); else OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); @@ -2312,7 +2541,7 @@ else static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) { DEFINE_COMPILER; -sljit_si mov_opcode; +sljit_s32 mov_opcode; SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2); SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 @@ -2328,7 +2557,7 @@ OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0); OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data)); -mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_UI : SLJIT_MOV; +mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 @@ -2565,7 +2794,7 @@ else JUMPHERE(jump); } -static void peek_char(compiler_common *common, sljit_ui max) +static void peek_char(compiler_common *common, sljit_u32 max) { /* Reads the character into TMP1, keeps STR_PTR. Does not check STR_END. TMP2 Destroyed. */ @@ -2610,12 +2839,12 @@ if (common->utf) #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 -static BOOL is_char7_bitset(const sljit_ub *bitset, BOOL nclass) +static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) { /* Tells whether the character codes below 128 are enough to determine a match. */ -const sljit_ub value = nclass ? 0xff : 0; -const sljit_ub *end = bitset + 32; +const sljit_u8 value = nclass ? 0xff : 0; +const sljit_u8 *end = bitset + 32; bitset += 16; do @@ -2640,12 +2869,12 @@ SLJIT_ASSERT(common->utf); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); if (full_read) { jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); JUMPHERE(jump); } @@ -2653,7 +2882,7 @@ if (full_read) #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ -static void read_char_range(compiler_common *common, sljit_ui min, sljit_ui max, BOOL update_str_ptr) +static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr) { /* Reads the precise value of a character into TMP1, if the character is between min and max (c >= min && c <= max). Otherwise it returns with a value @@ -2684,7 +2913,7 @@ if (common->utf) { OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); if (update_str_ptr) - OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); @@ -2708,7 +2937,7 @@ if (common->utf) { OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); if (update_str_ptr) - OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); @@ -2728,7 +2957,7 @@ if (common->utf) add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); else if (max < 128) { - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); } else @@ -2737,7 +2966,7 @@ if (common->utf) if (!update_str_ptr) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); else - OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); @@ -2807,7 +3036,7 @@ if (common->utf) { /* This can be an extra read in some situations, but hopefully it is needed in most cases. */ - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); if (!update_str_ptr) { @@ -2819,7 +3048,7 @@ if (common->utf) OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); JUMPHERE(jump2); } else @@ -2834,7 +3063,7 @@ if (common->utf) OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); #endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); #if PCRE2_CODE_UNIT_WIDTH != 8 JUMPHERE(jump); #endif @@ -3026,7 +3255,7 @@ compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); JUMPHERE(compare); @@ -3035,7 +3264,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); /* We only have types for characters less than 256. */ JUMPHERE(jump); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); @@ -3057,27 +3286,27 @@ SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); -OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } #endif /* SUPPORT_UNICODE */ -static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline) +static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, sljit_u32 overall_options) { DEFINE_COMPILER; struct sljit_label *mainloop; struct sljit_label *newlinelabel = NULL; struct sljit_jump *start; struct sljit_jump *end = NULL; -struct sljit_jump *nl = NULL; +struct sljit_jump *end2 = NULL; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_jump *singlechar; #endif @@ -3085,14 +3314,16 @@ jump_list *newline = NULL; BOOL newlinecheck = FALSE; BOOL readuchar = FALSE; -if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || - common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) +if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0) + && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) newlinecheck = TRUE; -if (firstline) +SLJIT_ASSERT(common->forced_quit_label == NULL); + +if ((overall_options & PCRE2_FIRSTLINE) != 0) { /* Search for the end of the first line. */ - SLJIT_ASSERT(common->first_line_end != 0); + SLJIT_ASSERT(common->match_end_ptr != 0); OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); if (common->nltype == NLTYPE_FIXED && common->newline > 255) @@ -3105,24 +3336,49 @@ if (firstline) CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); JUMPHERE(end); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else { end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); mainloop = LABEL(); /* Continual stores does not cause data dependency. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); read_char_range(common, common->nlmin, common->nlmax, TRUE); check_newlinechar(common, common->nltype, &newline, TRUE); CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop); JUMPHERE(end); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); set_jumps(newline, LABEL()); } OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); } +else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0) + { + /* Check whether offset limit is set and valid. */ + SLJIT_ASSERT(common->match_end_ptr != 0); + + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); + end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET); + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); +#if PCRE2_CODE_UNIT_WIDTH == 16 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +#elif PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); +#endif + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); + JUMPHERE(end2); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + add_jump(compiler, &common->forced_quit, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0)); + JUMPHERE(end); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0); + } start = JUMP(SLJIT_JUMP); @@ -3138,7 +3394,7 @@ if (newlinecheck) OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - nl = JUMP(SLJIT_JUMP); + end2 = JUMP(SLJIT_JUMP); } mainloop = LABEL(); @@ -3161,7 +3417,7 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); if (common->utf) { singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(singlechar); } @@ -3183,51 +3439,52 @@ JUMPHERE(start); if (newlinecheck) { JUMPHERE(end); - JUMPHERE(nl); + JUMPHERE(end2); } return mainloop; } #define MAX_N_CHARS 16 -#define MAX_N_BYTES 8 +#define MAX_DIFF_CHARS 6 -static SLJIT_INLINE void add_prefix_byte(sljit_ub byte, sljit_ub *bytes) +static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, PCRE2_UCHAR *chars) { -sljit_ub len = bytes[0]; -int i; +PCRE2_UCHAR i, len; +len = chars[0]; if (len == 255) return; if (len == 0) { - bytes[0] = 1; - bytes[1] = byte; + chars[0] = 1; + chars[1] = chr; return; } for (i = len; i > 0; i--) - if (bytes[i] == byte) + if (chars[i] == chr) return; -if (len >= MAX_N_BYTES - 1) +if (len >= MAX_DIFF_CHARS - 1) { - bytes[0] = 255; + chars[0] = 255; return; } len++; -bytes[len] = byte; -bytes[0] = len; +chars[len] = chr; +chars[0] = len; } -static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, sljit_ui *chars, sljit_ub *bytes, int max_chars) +static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *chars, int max_chars, sljit_u32 *rec_count) { /* Recursive function, which scans prefix literals. */ -BOOL last, any, caseless; +BOOL last, any, class, caseless; int len, repeat, len_save, consumed = 0; -sljit_ui chr, mask; +sljit_u32 chr; /* Any unicode character. */ +sljit_u8 *bytes, *bytes_end, byte; PCRE2_SPTR alternative, cc_save, oc; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 PCRE2_UCHAR othercase[8]; @@ -3240,9 +3497,15 @@ PCRE2_UCHAR othercase[1]; repeat = 1; while (TRUE) { + if (*rec_count == 0) + return 0; + (*rec_count)--; + last = TRUE; any = FALSE; + class = FALSE; caseless = FALSE; + switch (*cc) { case OP_CHARI: @@ -3304,7 +3567,7 @@ while (TRUE) #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); #endif - max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars); + max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); if (max_chars == 0) return consumed; last = FALSE; @@ -3327,7 +3590,7 @@ while (TRUE) alternative = cc + GET(cc, 1); while (*alternative == OP_ALT) { - max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars); + max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); if (max_chars == 0) return consumed; alternative += GET(alternative, 1); @@ -3340,18 +3603,17 @@ while (TRUE) case OP_CLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_ub *)(cc + 1), FALSE)) return consumed; + if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) + return consumed; #endif - any = TRUE; - cc += 1 + 32 / sizeof(PCRE2_UCHAR); + class = TRUE; break; case OP_NCLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) return consumed; #endif - any = TRUE; - cc += 1 + 32 / sizeof(PCRE2_UCHAR); + class = TRUE; break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 @@ -3366,7 +3628,7 @@ while (TRUE) case OP_DIGIT: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_digit, FALSE)) + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) return consumed; #endif any = TRUE; @@ -3375,7 +3637,7 @@ while (TRUE) case OP_WHITESPACE: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_space, FALSE)) + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) return consumed; #endif any = TRUE; @@ -3384,7 +3646,7 @@ while (TRUE) case OP_WORDCHAR: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_word, FALSE)) + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) return consumed; #endif any = TRUE; @@ -3439,27 +3701,14 @@ while (TRUE) if (any) { -#if PCRE2_CODE_UNIT_WIDTH == 8 - mask = 0xff; -#elif PCRE2_CODE_UNIT_WIDTH == 16 - mask = 0xffff; -#elif PCRE2_CODE_UNIT_WIDTH == 32 - mask = 0xffffffff; -#else - SLJIT_ASSERT_STOP(); -#endif - do { - chars[0] = mask; - chars[1] = mask; - bytes[0] = 255; + chars[0] = 255; consumed++; if (--max_chars == 0) return consumed; - chars += 2; - bytes += MAX_N_BYTES; + chars += MAX_DIFF_CHARS; } while (--repeat > 0); @@ -3467,6 +3716,103 @@ while (TRUE) continue; } + if (class) + { + bytes = (sljit_u8*) (cc + 1); + cc += 1 + 32 / sizeof(PCRE2_UCHAR); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + break; + + default: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + repeat = GET2(cc, 1); + if (repeat <= 0) + return consumed; + break; + } + + do + { + if (bytes[31] & 0x80) + chars[0] = 255; + else if (chars[0] != 255) + { + bytes_end = bytes + 32; + chr = 0; + do + { + byte = *bytes++; + SLJIT_ASSERT((chr & 0x7) == 0); + if (byte == 0) + chr += 8; + else + { + do + { + if ((byte & 0x1) != 0) + add_prefix_char(chr, chars); + byte >>= 1; + chr++; + } + while (byte != 0); + chr = (chr + 7) & ~7; + } + } + while (chars[0] != 255 && bytes < bytes_end); + bytes = bytes_end - 32; + } + + consumed++; + if (--max_chars == 0) + return consumed; + chars += MAX_DIFF_CHARS; + } + while (--repeat > 0); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + return consumed; + + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) + return consumed; + cc += 1 + 2 * IMM2_SIZE; + break; + } + + repeat = 1; + continue; + } + len = 1; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); @@ -3502,43 +3848,16 @@ while (TRUE) do { chr = *cc; -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (SLJIT_UNLIKELY(chr == NOTACHAR)) - return consumed; -#endif - add_prefix_byte((sljit_ub)chr, bytes); + add_prefix_char(*cc, chars); - mask = 0; if (caseless) - { - add_prefix_byte((sljit_ub)*oc, bytes); - mask = *cc ^ *oc; - chr |= mask; - } - -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (chars[0] == NOTACHAR && chars[1] == 0) -#else - if (chars[0] == NOTACHAR) -#endif - { - chars[0] = chr; - chars[1] = mask; - } - else - { - mask |= chars[0] ^ chr; - chr |= mask; - chars[0] = chr; - chars[1] |= mask; - } + add_prefix_char(*oc, chars); len--; consumed++; if (--max_chars == 0) return consumed; - chars += 2; - bytes += MAX_N_BYTES; + chars += MAX_DIFF_CHARS; cc++; oc++; } @@ -3557,161 +3876,576 @@ while (TRUE) } } -static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +static sljit_s32 character_to_int32(PCRE2_UCHAR chr) +{ +sljit_s32 value = (sljit_s32)chr; +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define SSE2_COMPARE_TYPE_INDEX 0 +return (value << 24) | (value << 16) | (value << 8) | value; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define SSE2_COMPARE_TYPE_INDEX 1 +return (value << 16) | value; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define SSE2_COMPARE_TYPE_INDEX 2 +return value; +#else +#error "Unsupported unit width" +#endif +} + +static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit[3]; +struct sljit_jump *nomatch; +sljit_u8 instruction[8]; +sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp2_ind = sljit_get_register_index(TMP2); +sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); +BOOL load_twice = FALSE; +PCRE2_UCHAR bit; + +bit = char1 ^ char2; +if (!is_powerof2(bit)) + bit = 0; + +if ((char1 != char2) && bit == 0) + load_twice = TRUE; + +quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +/* First part (unaligned start) */ + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); + +SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1); + +/* MOVD xmm, r/m32 */ +instruction[0] = 0x66; +instruction[1] = 0x0f; +instruction[2] = 0x6e; +instruction[3] = 0xc0 | (2 << 3) | tmp1_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (char1 != char2) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + + /* MOVD xmm, r/m32 */ + instruction[3] = 0xc0 | (3 << 3) | tmp1_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PSHUFD xmm1, xmm2/m128, imm8 */ +instruction[2] = 0x70; +instruction[3] = 0xc0 | (2 << 3) | 2; +instruction[4] = 0; +sljit_emit_op_custom(compiler, instruction, 5); + +if (char1 != char2) + { + /* PSHUFD xmm1, xmm2/m128, imm8 */ + instruction[3] = 0xc0 | (3 << 3) | 3; + instruction[4] = 0; + sljit_emit_op_custom(compiler, instruction, 5); + } + +OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf); +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); + +/* MOVDQA xmm1, xmm2/m128 */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +if (str_ptr_ind < 8) + { + instruction[2] = 0x6f; + instruction[3] = (0 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + + if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + } +else + { + instruction[1] = 0x41; + instruction[2] = 0x0f; + instruction[3] = 0x6f; + instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); + sljit_emit_op_custom(compiler, instruction, 5); + + if (load_twice) + { + instruction[4] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + instruction[1] = 0x0f; + } + +#else + +instruction[2] = 0x6f; +instruction[3] = (0 << 3) | str_ptr_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +#endif + +if (bit != 0) + { + /* POR xmm1, xmm2/m128 */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (0 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PCMPEQB/W/D xmm1, xmm2/m128 */ +instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; +instruction[3] = 0xc0 | (0 << 3) | 2; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = 0xc0 | (1 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PMOVMSKB reg, xmm */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); + instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; + sljit_emit_op_custom(compiler, instruction, 4); + + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); + } + +OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +nomatch = JUMP(SLJIT_ZERO); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit[1] = JUMP(SLJIT_JUMP); + +JUMPHERE(nomatch); + +start = LABEL(); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +/* Second part (aligned) */ + +instruction[0] = 0x66; +instruction[1] = 0x0f; + +/* MOVDQA xmm1, xmm2/m128 */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +if (str_ptr_ind < 8) + { + instruction[2] = 0x6f; + instruction[3] = (0 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + + if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + } +else + { + instruction[1] = 0x41; + instruction[2] = 0x0f; + instruction[3] = 0x6f; + instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); + sljit_emit_op_custom(compiler, instruction, 5); + + if (load_twice) + { + instruction[4] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + instruction[1] = 0x0f; + } + +#else + +instruction[2] = 0x6f; +instruction[3] = (0 << 3) | str_ptr_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +#endif + +if (bit != 0) + { + /* POR xmm1, xmm2/m128 */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (0 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PCMPEQB/W/D xmm1, xmm2/m128 */ +instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; +instruction[3] = 0xc0 | (0 << 3) | 2; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = 0xc0 | (1 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PMOVMSKB reg, xmm */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; + sljit_emit_op_custom(compiler, instruction, 4); + + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + } + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +JUMPTO(SLJIT_ZERO, start); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +start = LABEL(); +SET_LABEL(quit[0], start); +SET_LABEL(quit[1], start); +SET_LABEL(quit[2], start); +} + +#undef SSE2_COMPARE_TYPE_INDEX + +#endif + +static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { DEFINE_COMPILER; struct sljit_label *start; struct sljit_jump *quit; -sljit_ui chars[MAX_N_CHARS * 2]; -sljit_ub bytes[MAX_N_CHARS * MAX_N_BYTES]; -sljit_ub ones[MAX_N_CHARS]; -int offsets[3]; -sljit_ui mask; -sljit_ub *byte_set, *byte_set_end; -int i, max, from; -int range_right = -1, range_len = 3 - 1; -sljit_ub *update_table = NULL; -BOOL in_range; +struct sljit_jump *found; +PCRE2_UCHAR mask; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *utf_start = NULL; +struct sljit_jump *utf_quit = NULL; +#endif +BOOL has_match_end = (common->match_end_ptr != 0); -for (i = 0; i < MAX_N_CHARS; i++) +if (offset > 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + +if (has_match_end) { - chars[i << 1] = NOTACHAR; - chars[(i << 1) + 1] = 0; - bytes[i * MAX_N_BYTES] = 0; - } + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); -max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS); - -if (max <= 1) - return FALSE; - -for (i = 0; i < max; i++) - { - mask = chars[(i << 1) + 1]; - ones[i] = ones_in_half_byte[mask & 0xf]; - mask >>= 4; - while (mask != 0) + OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1)); +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + if (sljit_x86_is_cmov_available()) { - ones[i] += ones_in_half_byte[mask & 0xf]; - mask >>= 4; + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0); + sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0); + } +#endif + { + quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + JUMPHERE(quit); } } +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + utf_start = LABEL(); +#endif + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +/* SSE2 accelerated first character search. */ + +if (sljit_x86_is_sse2_available()) + { + fast_forward_first_char2_sse2(common, char1, char2); + + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0); + if (common->mode == PCRE2_JIT_COMPLETE) + { + /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */ + SLJIT_ASSERT(common->forced_quit_label == NULL); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if PCRE2_CODE_UNIT_WIDTH == 8 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); +#else +#error "Unknown code width" +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +#endif + + if (offset > 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + } + else if (sljit_x86_is_cmov_available()) + { + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0); + } + else + { + quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0); + JUMPHERE(quit); + } + + if (has_match_end) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + return; + } + +#endif + +quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +start = LABEL(); +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + +if (char1 == char2) + found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1); +else + { + mask = char1 ^ char2; + if (is_powerof2(mask)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask); + } + else + { + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + found = JUMP(SLJIT_NOT_ZERO); + } + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + utf_quit = JUMP(SLJIT_JUMP); +#endif + +JUMPHERE(found); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if PCRE2_CODE_UNIT_WIDTH == 8 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); +#else +#error "Unknown code width" +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(utf_quit); + } +#endif + +JUMPHERE(quit); + +if (has_match_end) + { + quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + if (offset > 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + JUMPHERE(quit); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + } + +if (offset > 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); +} + +static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit; +struct sljit_jump *match; +/* bytes[0] represent the number of characters between 0 +and MAX_N_BYTES - 1, 255 represents any character. */ +PCRE2_UCHAR chars[MAX_N_CHARS * MAX_DIFF_CHARS]; +sljit_s32 offset; +PCRE2_UCHAR mask; +PCRE2_UCHAR *char_set, *char_set_end; +int i, max, from; +int range_right = -1, range_len; +sljit_u8 *update_table = NULL; +BOOL in_range; +sljit_u32 rec_count; + +for (i = 0; i < MAX_N_CHARS; i++) + chars[i * MAX_DIFF_CHARS] = 0; + +rec_count = 10000; +max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); + +if (max < 1) + return FALSE; + in_range = FALSE; -from = 0; /* Prevent compiler "uninitialized" warning */ +/* Prevent compiler "uninitialized" warning */ +from = 0; +range_len = 4 /* minimum length */ - 1; for (i = 0; i <= max; i++) { - if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4)) + if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255)) { range_len = i - from; range_right = i - 1; } - if (i < max && bytes[i * MAX_N_BYTES] < 255) + if (i < max && chars[i * MAX_DIFF_CHARS] < 255) { + SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0); if (!in_range) { in_range = TRUE; from = i; } } - else if (in_range) + else in_range = FALSE; } if (range_right >= 0) { - update_table = (sljit_ub *)allocate_read_only_data(common, 256); + update_table = (sljit_u8 *)allocate_read_only_data(common, 256); if (update_table == NULL) return TRUE; memset(update_table, IN_UCHARS(range_len), 256); for (i = 0; i < range_len; i++) { - byte_set = bytes + ((range_right - i) * MAX_N_BYTES); - SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255); - byte_set_end = byte_set + byte_set[0]; - byte_set++; - while (byte_set <= byte_set_end) + char_set = chars + ((range_right - i) * MAX_DIFF_CHARS); + SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255); + char_set_end = char_set + char_set[0]; + char_set++; + while (char_set <= char_set_end) { - if (update_table[*byte_set] > IN_UCHARS(i)) - update_table[*byte_set] = IN_UCHARS(i); - byte_set++; + if (update_table[(*char_set) & 0xff] > IN_UCHARS(i)) + update_table[(*char_set) & 0xff] = IN_UCHARS(i); + char_set++; } } } -offsets[0] = -1; -offsets[1] = -1; -offsets[2] = -1; +offset = -1; /* Scan forward. */ for (i = 0; i < max; i++) - if (ones[i] <= 2) { - offsets[0] = i; - break; - } - -if (offsets[0] < 0 && range_right < 0) - return FALSE; - -if (offsets[0] >= 0) { - /* Scan backward. */ - for (i = max - 1; i > offsets[0]; i--) - if (ones[i] <= 2 && i != range_right) - { - offsets[1] = i; - break; - } - - /* This case is handled better by fast_forward_first_char. */ - if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0) - return FALSE; - - /* We only search for a middle character if there is no range check. */ - if (offsets[1] >= 0 && range_right == -1) + if (offset == -1) { - /* Scan from middle. */ - for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++) - if (ones[i] <= 2) + if (chars[i * MAX_DIFF_CHARS] <= 2) + offset = i; + } + else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2) + { + if (chars[i * MAX_DIFF_CHARS] == 1) + offset = i; + else + { + mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; + if (!is_powerof2(mask)) { - offsets[2] = i; - break; + mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2]; + if (is_powerof2(mask)) + offset = i; } - - if (offsets[2] == -1) - { - for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--) - if (ones[i] <= 2) - { - offsets[2] = i; - break; - } } } - - SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1])); - SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2])); - - chars[0] = chars[offsets[0] << 1]; - chars[1] = chars[(offsets[0] << 1) + 1]; - if (offsets[2] >= 0) - { - chars[2] = chars[offsets[2] << 1]; - chars[3] = chars[(offsets[2] << 1) + 1]; - } - if (offsets[1] >= 0) - { - chars[4] = chars[offsets[1] << 1]; - chars[5] = chars[(offsets[1] << 1) + 1]; - } } +if (range_right < 0) + { + if (offset < 0) + return FALSE; + SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2); + /* Works regardless the value is 1 or 2. */ + mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]]; + fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset); + return TRUE; + } + +if (range_right == offset) + offset = -1; + +SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2)); + max -= 1; -if (firstline) +SLJIT_ASSERT(max > 0); +if (common->match_end_ptr != 0) { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0); @@ -3721,68 +4455,86 @@ if (firstline) else OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +SLJIT_ASSERT(range_right >= 0); + #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -if (range_right >= 0) - OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); +OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); #endif start = LABEL(); quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0); - -if (range_right >= 0) - { #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); #else - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); #endif #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); #else - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); #endif - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); - } +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); -if (offsets[0] >= 0) +if (offset >= 0) { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0])); - if (offsets[1] >= 0) - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1])); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset)); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - if (chars[1] != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); - if (offsets[2] >= 0) - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1)); - - if (offsets[1] >= 0) + if (chars[offset * MAX_DIFF_CHARS] == 1) + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start); + else { - if (chars[5] != 0) - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]); - CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start); + mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; + if (is_powerof2(mask)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start); + } + else + { + match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start); + JUMPHERE(match); + } } - - if (offsets[2] >= 0) - { - if (chars[3] != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start); - } - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset != 0) + { + if (offset < 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + else + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +#if PCRE2_CODE_UNIT_WIDTH == 8 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start); +#else +#error "Unknown code width" +#endif + if (offset < 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +#endif + +if (offset >= 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(quit); -if (firstline) +if (common->match_end_ptr != 0) { if (range_right >= 0) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); if (range_right >= 0) { @@ -3797,26 +4549,10 @@ return TRUE; } #undef MAX_N_CHARS -#undef MAX_N_BYTES -static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless, BOOL firstline) +static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless) { -DEFINE_COMPILER; -struct sljit_label *start; -struct sljit_jump *quit; -struct sljit_jump *found; -PCRE2_UCHAR oc, bit; - -if (firstline) - { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); - } - -start = LABEL(); -quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +PCRE2_UCHAR oc; oc = first_char; if (caseless) @@ -3827,36 +4563,11 @@ if (caseless) oc = UCD_OTHERCASE(first_char); #endif } -if (first_char == oc) - found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char); -else - { - bit = first_char ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); - } - else - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); - found = JUMP(SLJIT_NOT_ZERO); - } - } -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_JUMP, start); -JUMPHERE(found); -JUMPHERE(quit); - -if (firstline) - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +fast_forward_first_char2(common, first_char, oc, 0); } -static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline) +static SLJIT_INLINE void fast_forward_newline(compiler_common *common) { DEFINE_COMPILER; struct sljit_label *loop; @@ -3867,11 +4578,10 @@ struct sljit_jump *foundcr = NULL; struct sljit_jump *notfoundnl; jump_list *newline = NULL; -if (firstline) +if (common->match_end_ptr != 0) { - SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); } if (common->nltype == NLTYPE_FIXED && common->newline > 255) @@ -3902,7 +4612,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255) JUMPHERE(firstchar); JUMPHERE(lastchar); - if (firstline) + if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); return; } @@ -3940,13 +4650,13 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) JUMPHERE(lastchar); JUMPHERE(firstchar); -if (firstline) +if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); } -static BOOL check_class_ranges(compiler_common *common, const sljit_ub *bits, BOOL nclass, BOOL invert, jump_list **backtracks); +static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); -static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_ub *start_bits, BOOL firstline) +static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits) { DEFINE_COMPILER; struct sljit_label *start; @@ -3957,11 +4667,10 @@ jump_list *matches = NULL; struct sljit_jump *jump; #endif -if (firstline) +if (common->match_end_ptr != 0) { - SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); } start = LABEL(); @@ -3981,7 +4690,7 @@ if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, #endif OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); found = JUMP(SLJIT_NOT_ZERO); @@ -3997,7 +4706,7 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); if (common->utf) { CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); } #elif PCRE2_CODE_UNIT_WIDTH == 16 @@ -4019,7 +4728,7 @@ if (matches != NULL) set_jumps(matches, LABEL()); JUMPHERE(quit); -if (firstline) +if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); } @@ -4032,7 +4741,7 @@ struct sljit_jump *alreadyfound; struct sljit_jump *found; struct sljit_jump *foundoc = NULL; struct sljit_jump *notfound; -sljit_ui oc, bit; +sljit_u32 oc, bit; SLJIT_ASSERT(common->req_char_ptr != 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); @@ -4169,7 +4878,7 @@ else if (common->utf) jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); @@ -4214,7 +4923,7 @@ else if (common->utf) jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); #endif - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); #if PCRE2_CODE_UNIT_WIDTH != 8 @@ -4230,12 +4939,12 @@ OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOC sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); } -static BOOL check_class_ranges(compiler_common *common, const sljit_ub *bits, BOOL nclass, BOOL invert, jump_list **backtracks) +static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) { /* May destroy TMP1. */ DEFINE_COMPILER; int ranges[MAX_RANGE_SIZE]; -sljit_ub bit, cbit, all; +sljit_u8 bit, cbit, all; int i, byte, length = 0; bit = bits[0] & 0x1; @@ -4328,8 +5037,10 @@ switch(length) case 4: if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] + && (ranges[1] & (ranges[2] - ranges[0])) == 0 && is_powerof2(ranges[2] - ranges[0])) { + SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); if (ranges[2] + 1 != ranges[3]) { @@ -4528,12 +5239,12 @@ OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); #if PCRE2_CODE_UNIT_WIDTH != 8 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255); #endif -OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); +OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); #if PCRE2_CODE_UNIT_WIDTH != 8 JUMPHERE(jump); jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255); #endif -OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); +OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); #if PCRE2_CODE_UNIT_WIDTH != 8 JUMPHERE(jump); #endif @@ -4558,11 +5269,11 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); static PCRE2_SPTR SLJIT_CALL do_utf_caselesscmp(PCRE2_SPTR src1, jit_arguments *args, PCRE2_SPTR end1) { /* This function would be ineffective to do in JIT level. */ -sljit_ui c1, c2; +sljit_u32 c1, c2; PCRE2_SPTR src2 = args->startchar_ptr; PCRE2_SPTR end2 = args->end; const ucd_record *ur; -const sljit_ui *pp; +const sljit_u32 *pp; while (src1 < end1) { @@ -4586,8 +5297,6 @@ return src2; #endif /* SUPPORT_UNICODE */ -static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); - static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc, compare_context *context, jump_list **backtracks) { @@ -4624,16 +5333,16 @@ if (context->sourcereg == -1) #if PCRE2_CODE_UNIT_WIDTH == 8 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) - OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 2) - OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); #elif PCRE2_CODE_UNIT_WIDTH == 16 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) - OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); @@ -4675,12 +5384,12 @@ do #endif { if (context->length >= 4) - OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 2) - OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); #if PCRE2_CODE_UNIT_WIDTH == 8 else if (context->length >= 1) - OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; @@ -4763,6 +5472,8 @@ return cc; } \ charoffset = (value); +static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); + static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) { DEFINE_COMPILER; @@ -4780,7 +5491,7 @@ BOOL utf = common->utf; BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; BOOL charsaved = FALSE; int typereg = TMP1; -const sljit_ui *other_cases; +const sljit_u32 *other_cases; sljit_uw typeoffset; #endif @@ -4788,6 +5499,7 @@ sljit_uw typeoffset; cc++; ccbegin = cc; compares = 0; + if (cc[-1] & XCL_MAP) { min = 0; @@ -4845,9 +5557,8 @@ while (*cc != XCL_END) /* Any either accepts everything or ignored. */ if (cc[-1] == XCL_PROP) { - if (list != backtracks) - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - else + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list == backtracks) add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); return; } @@ -4898,11 +5609,11 @@ if ((cc[-1] & XCL_HASPROP) == 0) if ((cc[-1] & XCL_MAP) != 0) { jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found)) + if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) { OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); @@ -4925,21 +5636,25 @@ else if ((cc[-1] & XCL_MAP) != 0) #ifdef SUPPORT_UNICODE charsaved = TRUE; #endif - if (!check_class_ranges(common, (const sljit_ub *)cc, FALSE, TRUE, list)) + if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) { #if PCRE2_CODE_UNIT_WIDTH == 8 - SLJIT_ASSERT(common->utf); + jump = NULL; + if (common->utf) #endif - jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); - JUMPHERE(jump); +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf) +#endif + JUMPHERE(jump); } OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); @@ -4953,18 +5668,18 @@ if (needstype || needsscript) OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); - OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); /* Before anything else, we deal with scripts. */ if (needsscript) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); ccbegin = cc; @@ -5011,12 +5726,12 @@ if (needstype || needsscript) if (!needschar) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); } else { OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); typereg = RETURN_ADDR; } } @@ -5280,7 +5995,7 @@ while (*cc != XCL_END) OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); SET_CHAR_OFFSET(0); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); SET_TYPE_OFFSET(ucp_Pc); @@ -5412,47 +6127,9 @@ switch(type) check_partial(common, FALSE); return cc; - case OP_CIRC: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2(SLJIT_IAND | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); - return cc; - - case OP_CIRCM: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); - OP2(SLJIT_IAND | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - if (!common->alt_circumflex) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - skip_char_back(common); - read_char_range(common, common->nlmin, common->nlmax, TRUE); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - case OP_DOLL: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_IAND | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); if (!common->endonly) @@ -5467,7 +6144,7 @@ switch(type) case OP_DOLLM: jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_IAND | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); @@ -5501,6 +6178,44 @@ switch(type) JUMPHERE(jump[0]); return cc; + case OP_CIRC: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + return cc; + + case OP_CIRCM: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); + OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + if (!common->alt_circumflex) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + skip_char_back(common); + read_char_range(common, common->nlmin, common->nlmax, TRUE); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + case OP_REVERSE: length = GET(cc, 0); if (length == 0) @@ -5552,7 +6267,7 @@ switch(type) if (check_str_ptr) detect_partial_match(common, backtracks); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_ub*)common->ctypes - cbit_length + cbit_digit, FALSE)) + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE)) read_char7_type(common, type == OP_NOT_DIGIT); else #endif @@ -5567,7 +6282,7 @@ switch(type) if (check_str_ptr) detect_partial_match(common, backtracks); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_ub*)common->ctypes - cbit_length + cbit_space, FALSE)) + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE)) read_char7_type(common, type == OP_NOT_WHITESPACE); else #endif @@ -5581,7 +6296,7 @@ switch(type) if (check_str_ptr) detect_partial_match(common, backtracks); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_ub*)common->ctypes - cbit_length + cbit_word, FALSE)) + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE)) read_char7_type(common, type == OP_NOT_WORDCHAR); else #endif @@ -5623,7 +6338,7 @@ switch(type) #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 #if PCRE2_CODE_UNIT_WIDTH == 8 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); #elif PCRE2_CODE_UNIT_WIDTH == 16 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); @@ -5710,7 +6425,7 @@ switch(type) OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); /* Optimize register allocation: use a real register. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); - OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); + OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); label = LABEL(); jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); @@ -5718,10 +6433,10 @@ switch(type) read_char(common); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); - OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); + OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); @@ -5805,7 +6520,7 @@ switch(type) c = *cc; if (c < 128) { - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (type == OP_NOT || !char_has_othercase(common, cc)) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); else @@ -5861,13 +6576,13 @@ switch(type) detect_partial_match(common, backtracks); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255; + bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; read_char_range(common, 0, bit, type == OP_NCLASS); #else read_char_range(common, 0, 255, type == OP_NCLASS); #endif - if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks)) + if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) return cc + 32 / sizeof(PCRE2_UCHAR); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 @@ -5892,7 +6607,7 @@ switch(type) OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); @@ -6419,8 +7134,8 @@ static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_ { PCRE2_SPTR begin = arguments->begin; PCRE2_SIZE *ovector = arguments->match_data->ovector; -uint32_t oveccount = arguments->oveccount; -uint32_t i; +sljit_u32 oveccount = arguments->oveccount; +sljit_u32 i; if (arguments->callout == NULL) return 0; @@ -6461,7 +7176,7 @@ static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *com { DEFINE_COMPILER; backtrack_common *backtrack; -sljit_si mov_opcode; +sljit_s32 mov_opcode; unsigned int callout_length = (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE); sljit_sw value1; @@ -6476,8 +7191,8 @@ SLJIT_ASSERT(common->capture_last_ptr != 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0; -OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1); -OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1); +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); /* These pointer sized fields temporarly stores internal variables. */ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); @@ -6486,7 +7201,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); if (common->mark_ptr != 0) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); -mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_UI : SLJIT_MOV; +mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1)); OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE)); @@ -6513,7 +7228,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); -OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); +OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); @@ -6542,6 +7257,10 @@ while (TRUE) case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: case OP_CALLOUT: case OP_ALT: cc += PRIV(OP_lengths)[*cc]; @@ -7835,6 +8554,10 @@ while (*cc != OP_KETRPOS) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); } + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); @@ -7862,6 +8585,10 @@ while (*cc != OP_KETRPOS) OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0); } + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); @@ -7874,9 +8601,6 @@ while (*cc != OP_KETRPOS) } } - if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); - JUMPTO(SLJIT_JUMP, loop); flush_stubs(common); @@ -7930,7 +8654,7 @@ count_match(common); return cc + 1 + LINK_SIZE; } -static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_ui *max, sljit_ui *exact, PCRE2_SPTR *end) +static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end) { int class_len; @@ -8081,7 +8805,9 @@ DEFINE_COMPILER; backtrack_common *backtrack; PCRE2_UCHAR opcode; PCRE2_UCHAR type; -sljit_ui max = 0, exact; +sljit_u32 max = 0, exact; +BOOL fast_fail; +sljit_s32 fast_str_ptr; BOOL charpos_enabled; PCRE2_UCHAR charpos_char; unsigned int charpos_othercasebit; @@ -8098,6 +8824,19 @@ int tmp_base, tmp_offset; PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); +fast_str_ptr = PRIVATE_DATA(cc + 1); +fast_fail = TRUE; + +SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr); + +if (cc == common->fast_forward_bc_ptr) + fast_fail = FALSE; +else if (common->fast_fail_start_ptr == 0) + fast_str_ptr = 0; + +SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0 + || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr)); + cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); if (type != OP_EXTUNI) @@ -8111,9 +8850,13 @@ else tmp_offset = POSSESSIVE0; } +if (fast_fail && fast_str_ptr != 0) + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); + /* Handle fixed part first. */ if (exact > 1) { + SLJIT_ASSERT(fast_str_ptr == 0); if (common->mode == PCRE2_JIT_COMPLETE #ifdef SUPPORT_UNICODE && !common->utf @@ -8144,9 +8887,12 @@ switch(opcode) { case OP_STAR: case OP_UPTO: + SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR); + if (type == OP_ANYNL || type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); + SLJIT_ASSERT(fast_str_ptr == 0); allocate_stack(common, 2); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); @@ -8201,9 +8947,7 @@ switch(opcode) #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); if ((charpos_othercasebit & 0x100) != 0) - { charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; - } #endif if (charpos_othercasebit != 0) charpos_char |= charpos_othercasebit; @@ -8228,6 +8972,8 @@ switch(opcode) add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); } compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); JUMPHERE(jump); detect_partial_match(common, &backtrack->topbacktracks); @@ -8249,6 +8995,8 @@ switch(opcode) /* Search the last instance of charpos_char. */ label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); detect_partial_match(common, &no_match); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); if (charpos_othercasebit != 0) @@ -8304,6 +9052,8 @@ switch(opcode) set_jumps(no_match, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); } #endif else @@ -8331,6 +9081,8 @@ switch(opcode) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_match, LABEL()); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); } } BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); @@ -8341,9 +9093,12 @@ switch(opcode) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); break; case OP_MINUPTO: + SLJIT_ASSERT(fast_str_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 2); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); @@ -8353,6 +9108,7 @@ switch(opcode) case OP_QUERY: case OP_MINQUERY: + SLJIT_ASSERT(fast_str_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); @@ -8375,6 +9131,8 @@ switch(opcode) JUMPTO(SLJIT_JUMP, label); set_jumps(no_match, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); break; } #endif @@ -8385,9 +9143,12 @@ switch(opcode) set_jumps(no_char1_match, LABEL()); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_match, LABEL()); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); break; case OP_POSUPTO: + SLJIT_ASSERT(fast_str_ptr == 0); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) { @@ -8416,6 +9177,7 @@ switch(opcode) break; case OP_POSQUERY: + SLJIT_ASSERT(fast_str_ptr == 0); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); compile_char1_matchingpath(common, type, cc, &no_match, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); @@ -8460,7 +9222,7 @@ if (common->accept_label == NULL) else CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_UI, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); +OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO)); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); @@ -8587,10 +9349,10 @@ while (cc < ccend) case OP_WORD_BOUNDARY: case OP_EODN: case OP_EOD: - case OP_CIRC: - case OP_CIRCM: case OP_DOLL: case OP_DOLLM: + case OP_CIRC: + case OP_CIRCM: case OP_REVERSE: cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; @@ -8774,8 +9536,7 @@ while (cc < ccend) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); } BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); - if (cc[1] > OP_ASSERTBACK_NOT) - count_match(common); + count_match(common); break; case OP_ONCE: @@ -8893,7 +9654,7 @@ DEFINE_COMPILER; PCRE2_SPTR cc = current->cc; PCRE2_UCHAR opcode; PCRE2_UCHAR type; -sljit_ui max = 0, exact; +sljit_u32 max = 0, exact; struct sljit_label *label = NULL; struct sljit_jump *jump = NULL; jump_list *jumplist = NULL; @@ -9012,7 +9773,7 @@ switch(opcode) break; } - set_jumps(current->topbacktracks, LABEL()); +set_jumps(current->topbacktracks, LABEL()); } static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -9944,7 +10705,7 @@ static SLJIT_INLINE void compile_recurse(compiler_common *common) DEFINE_COMPILER; PCRE2_SPTR cc = common->start + common->currententry->start; PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); -PCRE2_SPTR ccend = bracketend(cc); +PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE); BOOL needs_control_head; int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head); int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head); @@ -9967,6 +10728,7 @@ common->currententry->entry = LABEL(); set_jumps(common->currententry->calls, common->currententry->entry); sljit_emit_fast_enter(compiler, TMP2, 0); +count_match(common); allocate_stack(common, private_data_size + framesize + alternativesize); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); @@ -10066,14 +10828,14 @@ sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); #undef COMPILE_BACKTRACKINGPATH #undef CURRENT_AS -static int jit_compile(pcre2_code *code, uint32_t mode) +static int jit_compile(pcre2_code *code, sljit_u32 mode) { pcre2_real_code *re = (pcre2_real_code *)code; struct sljit_compiler *compiler; backtrack_common rootbacktrack; compiler_common common_data; compiler_common *common = &common_data; -const sljit_ub *tables = re->tables; +const sljit_u8 *tables = re->tables; void *allocator_data = &re->memctl; int private_data_size; PCRE2_SPTR ccend; @@ -10170,7 +10932,7 @@ ccend = bracketend(common->start); /* Calculate the local space size on the stack. */ common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); -common->optimized_cbracket = (sljit_ub *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data); +common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data); if (!common->optimized_cbracket) return PCRE2_ERROR_NOMEMORY; #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 @@ -10206,9 +10968,9 @@ if (mode != PCRE2_JIT_COMPLETE) common->ovector_start += sizeof(sljit_sw); } } -if ((re->overall_options & PCRE2_FIRSTLINE) != 0) +if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0) { - common->first_line_end = common->ovector_start; + common->match_end_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD @@ -10241,16 +11003,24 @@ SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); total_length = ccend - common->start; -common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), allocator_data); +common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); if (!common->private_data_ptrs) { SLJIT_FREE(common->optimized_cbracket, allocator_data); return PCRE2_ERROR_NOMEMORY; } -memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si)); +memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); set_private_data_ptrs(common, &private_data_size, ccend); +if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back) + detect_fast_fail(common, common->start, &private_data_size, 4); + } + +SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr); + if (private_data_size > SLJIT_MAX_LOCAL_SIZE) { SLJIT_FREE(common->private_data_ptrs, allocator_data); @@ -10260,7 +11030,7 @@ if (private_data_size > SLJIT_MAX_LOCAL_SIZE) if (common->has_then) { - common->then_offsets = (sljit_ub *)(common->private_data_ptrs + total_length); + common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); memset(common->then_offsets, 0, total_length); set_then_offsets(common, common->start, NULL); } @@ -10287,11 +11057,15 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); -OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); +OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base)); OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); +if (common->fast_fail_start_ptr < common->fast_fail_end_ptr) + reset_fast_fail(common); + if (mode == PCRE2_JIT_PARTIAL_SOFT) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); if (common->mark_ptr != 0) @@ -10302,19 +11076,19 @@ if (common->control_head_ptr != 0) /* Main part of the matching */ if ((re->overall_options & PCRE2_ANCHORED) == 0) { - mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, (re->overall_options & PCRE2_FIRSTLINE) != 0); + mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, re->overall_options); continue_match_label = LABEL(); /* Forward search if possible. */ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) { - if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common, (re->overall_options & PCRE2_FIRSTLINE) != 0)) + if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common)) ; else if ((re->flags & PCRE2_FIRSTSET) != 0) - fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0, (re->overall_options & PCRE2_FIRSTLINE) != 0); + fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0); else if ((re->flags & PCRE2_STARTLINE) != 0) - fast_forward_newline(common, (re->overall_options & PCRE2_FIRSTLINE) != 0); + fast_forward_newline(common); else if ((re->flags & PCRE2_FIRSTMAPSET) != 0) - fast_forward_start_bits(common, re->start_bitmap, (re->overall_options & PCRE2_FIRSTLINE) != 0); + fast_forward_start_bits(common, re->start_bitmap); } } else @@ -10335,6 +11109,8 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); if (common->capture_last_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0); +if (common->fast_forward_bc_ptr != NULL) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0); if (common->start_ptr != OVECTOR(0)) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); @@ -10413,29 +11189,34 @@ if (mode == PCRE2_JIT_PARTIAL_SOFT) } /* Check we have remaining characters. */ -if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_FIRSTLINE) != 0) +if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0) { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); } -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), + (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr); if ((re->overall_options & PCRE2_ANCHORED) == 0) { if (common->ff_newline_shortcut != NULL) { + /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */ if ((re->overall_options & PCRE2_FIRSTLINE) == 0) - CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); - /* There cannot be more newlines here. */ + { + if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); + CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + } + else + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); + } } else - { - if ((re->overall_options & PCRE2_FIRSTLINE) == 0) - CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label); - else - CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label); - } + CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label); } /* No more remaining characters. */ @@ -10454,7 +11235,7 @@ if (common->might_be_empty) { JUMPHERE(empty_match); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV_UI, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); @@ -10464,6 +11245,9 @@ if (common->might_be_empty) JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); } +common->fast_forward_bc_ptr = NULL; +common->fast_fail_start_ptr = 0; +common->fast_fail_end_ptr = 0; common->currententry = common->entries; common->local_exit = TRUE; quit_label = common->quit_label; diff --git a/pcre2/src/pcre2_jit_match.c b/pcre2/src/pcre2_jit_match.c index d8d941e46..a323971ff 100644 --- a/pcre2/src/pcre2_jit_match.c +++ b/pcre2/src/pcre2_jit_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -46,7 +46,7 @@ POSSIBILITY OF SUCH DAMAGE. static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func) { -sljit_ub local_space[MACHINE_STACK_SIZE]; +sljit_u8 local_space[MACHINE_STACK_SIZE]; struct sljit_stack local_stack; local_stack.top = (sljit_sw)&local_space; @@ -129,10 +129,12 @@ arguments.match_data = match_data; arguments.startchar_ptr = subject; arguments.mark_ptr = NULL; arguments.options = options; + if (mcontext != NULL) { arguments.callout = mcontext->callout; arguments.callout_data = mcontext->callout_data; + arguments.offset_limit = mcontext->offset_limit; arguments.limit_match = (mcontext->match_limit < re->limit_match)? mcontext->match_limit : re->limit_match; if (mcontext->jit_callback != NULL) @@ -144,6 +146,7 @@ else { arguments.callout = NULL; arguments.callout_data = NULL; + arguments.offset_limit = PCRE2_UNSET; arguments.limit_match = (MATCH_LIMIT < re->limit_match)? MATCH_LIMIT : re->limit_match; jit_stack = NULL; diff --git a/pcre2/src/pcre2_jit_misc.c b/pcre2/src/pcre2_jit_misc.c index f5b51286e..efdb05580 100644 --- a/pcre2/src/pcre2_jit_misc.c +++ b/pcre2/src/pcre2_jit_misc.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without diff --git a/pcre2/src/pcre2_jit_test.c b/pcre2/src/pcre2_jit_test.c index b076c67d1..705ba181e 100644 --- a/pcre2/src/pcre2_jit_test.c +++ b/pcre2/src/pcre2_jit_test.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -140,7 +140,6 @@ int main(void) #define F_DIFF 0x080000 #define F_FORCECONV 0x100000 #define F_PROPERTY 0x200000 -#define F_STUDY 0x400000 struct regression_test_case { int compile_options; @@ -188,6 +187,7 @@ static struct regression_test_case regression_test_cases[] = { { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, + { M, A, 0, 0, "[3-57-9]", "5" }, /* Assertions. */ { MU, A, 0, 0, "\\b[^A]", "A_B#" }, @@ -247,13 +247,17 @@ static struct regression_test_case regression_test_cases[] = { { M, A, 0, 0, "a\\z", "aaa" }, { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" }, - /* Brackets. */ + /* Brackets and alternatives. */ { MU, A, 0, 0, "(ab|bb|cd)", "bacde" }, { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" }, { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" }, { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" }, { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" }, { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" }, + { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, + { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" }, + { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, + { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" }, /* Greedy and non-greedy ? operators. */ { MU, A, 0, 0, "(?:a)?a", "laab" }, @@ -323,6 +327,14 @@ static struct regression_test_case regression_test_cases[] = { { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" }, + { MU, A, 0, 0, "\\d+123", "987654321,01234" }, + { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" }, + { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" }, + { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."}, + { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."}, + { MU, A, 0, 0, ".[ab]*.", "xx" }, + { MU, A, 0, 0, ".[ab]*a", "xxa" }, + { MU, A, 0, 0, ".[ab]?.", "xx" }, /* Bracket repeats with limit. */ { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" }, @@ -679,6 +691,8 @@ static struct regression_test_case regression_test_cases[] = { { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" }, { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" }, { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" }, + { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" }, /* Recurse. */ { MU, A, 0, 0, "(a)(?1)", "aa" }, @@ -765,11 +779,11 @@ static struct regression_test_case regression_test_cases[] = { { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" }, { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" }, - { MU, A, 0, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" }, + { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" }, { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" }, - { MU, A, 0, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" }, - { MU, A, 0, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" }, + { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" }, + { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" }, /* (*COMMIT) verb. */ { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" }, @@ -813,6 +827,9 @@ static struct regression_test_case regression_test_cases[] = { /* (*SKIP) verb. */ { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" }, + { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," }, + { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," }, + { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" }, /* (*THEN) verb. */ { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" }, @@ -1516,10 +1533,10 @@ static int regression_tests(void) is_successful = 0; } #endif -#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_16 - if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) { - printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", - i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i], +#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32 + if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) { + printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", + i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i], total, current->pattern, current->input); is_successful = 0; } diff --git a/pcre2/src/pcre2_maketables.c b/pcre2/src/pcre2_maketables.c index ca68bca2a..2c7ae84d8 100644 --- a/pcre2/src/pcre2_maketables.c +++ b/pcre2/src/pcre2_maketables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without diff --git a/pcre2/src/pcre2_match.c b/pcre2/src/pcre2_match.c index d3d5c1dfa..78a9bacbc 100644 --- a/pcre2/src/pcre2_match.c +++ b/pcre2/src/pcre2_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -55,7 +55,7 @@ POSSIBILITY OF SUCH DAMAGE. #define PUBLIC_MATCH_OPTIONS \ (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ - PCRE2_PARTIAL_SOFT) + PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT) #define PUBLIC_JIT_MATCH_OPTIONS \ (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\ @@ -142,14 +142,14 @@ Returns: = 0 sucessful match; number of code units matched is set */ static int -match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr, +match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, PCRE2_SPTR eptr, match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr) { #if defined SUPPORT_UNICODE BOOL utf = (mb->poptions & PCRE2_UTF) != 0; #endif -register PCRE2_SPTR p; +PCRE2_SPTR p; PCRE2_SIZE length; PCRE2_SPTR eptr_start = eptr; @@ -194,7 +194,7 @@ if (caseless) GETCHARINC(c, eptr); GETCHARINC(d, p); ur = GET_UCD(d); - if (c != d && c != d + ur->other_case) + if (c != d && c != (uint32_t)((int)d + ur->other_case)) { const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset; for (;;) @@ -211,7 +211,7 @@ if (caseless) /* Not in UTF mode */ { - while (length-- > 0) + for (; length > 0; length--) { uint32_t cc, cp; if (eptr >= mb->end_subject) return 1; /* Partial match */ @@ -226,11 +226,11 @@ if (caseless) } /* In the caseful case, we can just compare the code units, whether or not we -are in UT mode. */ +are in UTF mode. */ else { - while (length-- > 0) + for (; length > 0; length--) { if (eptr >= mb->end_subject) return 1; /* Partial match */ if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /*No match */ @@ -296,7 +296,6 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, argument of RMATCH isn't actually used in this definition. */ #ifndef HEAP_MATCH_RECURSE -#define REGISTER register #define RMATCH(ra,rb,rc,rd,re,rw) \ rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1) #define RRETURN(ra) return ra @@ -306,8 +305,6 @@ argument of RMATCH isn't actually used in this definition. */ the "rd" argument of RMATCH isn't actually used in this definition. It's the mb argument of match(), which never changes. */ -#define REGISTER - #define RMATCH(ra,rb,rc,rd,re,rw)\ {\ heapframe *newframe = frame->Xnextframe;\ @@ -425,7 +422,7 @@ to save the ovector while calling match() to process the pattern recursion. */ op_recurse_ovecsave(). */ static int -match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, +match(PCRE2_SPTR eptr, PCRE2_SPTR ecode, PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth); @@ -465,14 +462,14 @@ Returns: a match() return code */ static int -#ifdef __GNUC__ +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) __attribute__ ((noinline)) #endif -op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat, +op_recurse_ovecsave(PCRE2_SPTR eptr, PCRE2_SPTR callpat, PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth) { -register int rrc; +int rrc; BOOL cbegroup = *callpat >= OP_SBRA; recursion_info *new_recursive = mb->recursive; PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX]; @@ -576,20 +573,19 @@ Returns: MATCH_MATCH if matched ) these values are >= 0 */ static int -match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, +match(PCRE2_SPTR eptr, PCRE2_SPTR ecode, PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth) { /* These variables do not need to be preserved over recursion in this function, so they can be ordinary variables in all cases. Mark some of them with "register" because they are used a lot in loops. */ -register int rrc; /* Returns from recursive calls */ -register int i; /* Used for loops not involving calls to RMATCH() */ -register uint32_t c; /* Character values not kept over RMATCH() calls */ -register BOOL utf; /* Local copy of UTF flag for speed */ +int rrc; /* Returns from recursive calls */ +int i; /* Used for loops not involving calls to RMATCH() */ +uint32_t c; /* Character values not kept over RMATCH() calls */ +BOOL utf; /* Local copy of UTF flag for speed */ BOOL minimize, possessive; /* Quantifier options */ -BOOL caseless; int condcode; /* When recursion is not being used, all "local" variables that have to be @@ -727,6 +723,7 @@ still need to be preserved over recursive calls of match(). These macros define the alternative names that are used. */ #define allow_zero cur_is_word +#define caseless cur_is_word #define cbegroup condition #define code_offset codelink #define condassert condition @@ -1319,7 +1316,7 @@ for (;;) { pcre2_callout_block cb; cb.version = 1; - cb.capture_top = offset_top/2; + cb.capture_top = (uint32_t)offset_top/2; cb.capture_last = mb->capture_last & CAPLMASK; cb.offset_vector = mb->ovector; cb.mark = mb->nomatch_mark; @@ -1503,8 +1500,8 @@ for (;;) if (offset >= offset_top) { - register PCRE2_SIZE *iptr = mb->ovector + offset_top; - register PCRE2_SIZE *iend = mb->ovector + offset; + PCRE2_SIZE *iptr = mb->ovector + offset_top; + PCRE2_SIZE *iend = mb->ovector + offset; while (iptr < iend) *iptr++ = PCRE2_UNSET; offset_top = offset + 2; } @@ -1704,14 +1701,14 @@ for (;;) back a number of characters, not bytes. */ case OP_REVERSE: + i = GET(ecode, 1); #ifdef SUPPORT_UNICODE if (utf) { - i = GET(ecode, 1); while (i-- > 0) { + if (eptr <= mb->start_subject) RRETURN(MATCH_NOMATCH); eptr--; - if (eptr < mb->start_subject) RRETURN(MATCH_NOMATCH); BACKCHAR(eptr); } } @@ -1721,8 +1718,8 @@ for (;;) /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ { - eptr -= GET(ecode, 1); - if (eptr < mb->start_subject) RRETURN(MATCH_NOMATCH); + if (i > eptr - mb->start_subject) RRETURN(MATCH_NOMATCH); + eptr -= i; } /* Save the earliest consulted character, then skip to next op code */ @@ -1746,7 +1743,7 @@ for (;;) pcre2_callout_block cb; cb.version = 1; cb.callout_number = ecode[LINK_SIZE + 1]; - cb.capture_top = offset_top/2; + cb.capture_top = (uint32_t)offset_top/2; cb.capture_last = mb->capture_last & CAPLMASK; cb.offset_vector = mb->ovector; cb.mark = mb->nomatch_mark; @@ -2052,8 +2049,8 @@ for (;;) if (offset > offset_top) { - register PCRE2_SIZE *iptr = mb->ovector + offset_top; - register PCRE2_SIZE *iend = mb->ovector + offset; + PCRE2_SIZE *iptr = mb->ovector + offset_top; + PCRE2_SIZE *iend = mb->ovector + offset; while (iptr < iend) *iptr++ = PCRE2_UNSET; } @@ -2382,7 +2379,7 @@ for (;;) case OP_ANY: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); if (mb->partial != 0 && - eptr + 1 >= mb->end_subject && + eptr == mb->end_subject - 1 && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && UCHAR21TEST(eptr) == NLBLOCK->nl[0]) @@ -2408,8 +2405,9 @@ for (;;) ecode++; break; - /* Match a single byte, even in UTF-8 mode. This opcode really does match - any byte, even newline, independent of the setting of PCRE2_DOTALL. */ + /* Match a single code unit, even in UTF-8 mode. This opcode really does + match any code unit, even newline. (It really should be called ANYCODEUNIT, + of course - the byte name is from pre-16 bit days.) */ case OP_ANYBYTE: if (eptr >= mb->end_subject) /* DO NOT merge the eptr++ here; it must */ @@ -2848,9 +2846,7 @@ for (;;) continue; } - /* First, ensure the minimum number of matches are present. We get back - the length of the reference string explicitly rather than passing the - address of eptr, so that eptr can be a register variable. */ + /* First, ensure the minimum number of matches are present. */ for (i = 1; i <= min; i++) { @@ -3342,7 +3338,10 @@ for (;;) CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ RRETURN(MATCH_NOMATCH); } - while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH); + for (; length > 0; length--) + { + if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH); + } } else #endif @@ -3758,7 +3757,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - register uint32_t ch, och; + uint32_t ch, och; ecode++; GETCHARINC(ch, ecode); @@ -3780,7 +3779,7 @@ for (;;) else #endif /* SUPPORT_UNICODE */ { - register uint32_t ch = ecode[1]; + uint32_t ch = ecode[1]; c = *eptr++; if (ch == c || (op == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == c)) RRETURN(MATCH_NOMATCH); @@ -3886,7 +3885,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - register uint32_t d; + uint32_t d; for (i = 1; i <= min; i++) { if (eptr >= mb->end_subject) @@ -3921,7 +3920,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - register uint32_t d; + uint32_t d; for (fi = min;; fi++) { RMATCH(eptr, ecode, offset_top, mb, eptrb, RM28); @@ -3966,7 +3965,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - register uint32_t d; + uint32_t d; for (i = min; i < max; i++) { int len = 1; @@ -4027,7 +4026,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - register uint32_t d; + uint32_t d; for (i = 1; i <= min; i++) { if (eptr >= mb->end_subject) @@ -4061,7 +4060,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - register uint32_t d; + uint32_t d; for (fi = min;; fi++) { RMATCH(eptr, ecode, offset_top, mb, eptrb, RM32); @@ -4105,7 +4104,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - register uint32_t d; + uint32_t d; for (i = min; i < max; i++) { int len = 1; @@ -6459,6 +6458,7 @@ PCRE2_UCHAR first_cu2 = 0; PCRE2_UCHAR req_cu = 0; PCRE2_UCHAR req_cu2 = 0; +PCRE2_SPTR bumpalong_limit; PCRE2_SPTR end_subject; PCRE2_SPTR start_match = subject + start_offset; PCRE2_SPTR req_cu_ptr = start_match - 1; @@ -6482,6 +6482,7 @@ mb->match_frames_base = &frame_zero; subject string. */ if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); +end_subject = subject + length; /* Plausibility checks */ @@ -6513,7 +6514,7 @@ occur. */ #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) -options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO)); +options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1))); #undef FF #undef OO @@ -6533,21 +6534,66 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 : /* Check a UTF string for validity if required. For 8-bit and 16-bit strings, we must also check that a starting offset does not point into the middle of a -multiunit character. */ +multiunit character. We check only the portion of the subject that is going to +be inspected during matching - from the offset minus the maximum back reference +to the given length. This saves time when a small part of a large subject is +being matched by the use of a starting offset. Note that the maximum lookbehind +is a number of characters, not code units. */ #ifdef SUPPORT_UNICODE if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) { - match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar)); - if (match_data->rc != 0) return match_data->rc; + PCRE2_SPTR check_subject = start_match; /* start_match includes offset */ + + if (start_offset > 0) + { #if PCRE2_CODE_UNIT_WIDTH != 32 - if (start_offset > 0 && start_offset < length && - NOT_FIRSTCHAR(subject[start_offset])) - return PCRE2_ERROR_BADUTFOFFSET; + unsigned int i; + if (start_match < end_subject && NOT_FIRSTCU(*start_match)) + return PCRE2_ERROR_BADUTFOFFSET; + for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--) + { + check_subject--; + while (check_subject > subject && +#if PCRE2_CODE_UNIT_WIDTH == 8 + (*check_subject & 0xc0) == 0x80) +#else /* 16-bit */ + (*check_subject & 0xfc00) == 0xdc00) +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + check_subject--; + } +#else + /* In the 32-bit library, one code unit equals one character. However, + we cannot just subtract the lookbehind and then compare pointers, because + a very large lookbehind could create an invalid pointer. */ + + if (start_offset >= re->max_lookbehind) + check_subject -= re->max_lookbehind; + else + check_subject = subject; #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + } + + /* Validate the relevant portion of the subject. After an error, adjust the + offset to be an absolute offset in the whole string. */ + + match_data->rc = PRIV(valid_utf)(check_subject, + length - (check_subject - subject), &(match_data->startchar)); + if (match_data->rc != 0) + { + match_data->startchar += check_subject - subject; + return match_data->rc; + } } #endif /* SUPPORT_UNICODE */ +/* It is an error to set an offset limit without setting the flag at compile +time. */ + +if (mcontext->offset_limit != PCRE2_UNSET && + (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) + return PCRE2_ERROR_BADOFFSETLIMIT; + /* If the pattern was successfully studied with JIT support, run the JIT executable instead of the rest of this function. Most options must be set at compile time for the JIT code to be usable. Fallback to the normal code path if @@ -6568,30 +6614,21 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0) anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; startline = (re->flags & PCRE2_STARTLINE) != 0; +bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)? + end_subject : subject + mcontext->offset_limit; /* Fill in the fields in the match block. */ -if (mcontext == NULL) - { - mb->callout = NULL; - mb->memctl = re->memctl; +mb->callout = mcontext->callout; +mb->callout_data = mcontext->callout_data; +mb->memctl = mcontext->memctl; #ifdef HEAP_MATCH_RECURSE - mb->stack_memctl = re->memctl; +mb->stack_memctl = mcontext->stack_memctl; #endif - } -else - { - mb->callout = mcontext->callout; - mb->callout_data = mcontext->callout_data; - mb->memctl = mcontext->memctl; -#ifdef HEAP_MATCH_RECURSE - mb->stack_memctl = mcontext->stack_memctl; -#endif - } mb->start_subject = subject; mb->start_offset = start_offset; -mb->end_subject = end_subject = mb->start_subject + length; +mb->end_subject = end_subject; mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0; mb->moptions = options; /* Match options */ @@ -6689,8 +6726,8 @@ in case they inspect these fields. */ if (ocount > 0) { - register PCRE2_SIZE *iptr = mb->ovector + ocount; - register PCRE2_SIZE *iend = iptr - re->top_bracket; + PCRE2_SIZE *iptr = mb->ovector + ocount; + PCRE2_SIZE *iend = iptr - re->top_bracket; if (iend < mb->ovector + 2) iend = mb->ovector + 2; while (--iptr >= iend) *iptr = PCRE2_UNSET; mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET; @@ -6783,7 +6820,8 @@ for(;;) end_subject = t; } - /* Advance to a unique first code unit if there is one. */ + /* Advance to a unique first code unit if there is one. In 8-bit mode, the + use of memchr() gives a big speed up. */ if (has_first_cu) { @@ -6793,8 +6831,15 @@ for(;;) (smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2) start_match++; else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu) start_match++; +#else + start_match = memchr(start_match, first_cu, end_subject - start_match); + if (start_match == NULL) start_match = end_subject; +#endif + } } /* Or to just after a linebreak for a multiline match */ @@ -6838,7 +6883,7 @@ for(;;) { while (start_match < end_subject) { - register uint32_t c = UCHAR21TEST(start_match); + uint32_t c = UCHAR21TEST(start_match); #if PCRE2_CODE_UNIT_WIDTH != 8 if (c > 255) c = 255; #endif @@ -6882,7 +6927,7 @@ for(;;) if (has_req_cu && end_subject - start_match < REQ_CU_MAX) { - register PCRE2_SPTR p = start_match + (has_first_cu? 1:0); + PCRE2_SPTR p = start_match + (has_first_cu? 1:0); /* We don't need to repeat the search if we haven't yet reached the place we found it at last time. */ @@ -6893,7 +6938,7 @@ for(;;) { while (p < end_subject) { - register uint32_t pp = UCHAR21INCTEST(p); + uint32_t pp = UCHAR21INCTEST(p); if (pp == req_cu || pp == req_cu2) { p--; break; } } } @@ -6926,6 +6971,14 @@ for(;;) /* ------------ End of start of match optimizations ------------ */ + /* Give no match if we have passed the bumpalong limit. */ + + if (start_match > bumpalong_limit) + { + rc = MATCH_NOMATCH; + break; + } + /* OK, we can now run the match. If "hitend" is set afterwards, remember the first starting point for which a partial match was found. */ @@ -7044,7 +7097,7 @@ for(;;) (2) The pattern is anchored or the match was failed by (*COMMIT); -(3) We are past the end of the subject; +(3) We are past the end of the subject or the bumpalong limit; (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because this option requests that a match occur at or before the first newline in @@ -7104,7 +7157,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) too many to fit into the ovector. */ match_data->rc = ((mb->capture_last & OVFLBIT) != 0)? - 0 : mb->end_offset_top/2; + 0 : (int)mb->end_offset_top/2; /* If there is space in the offset vector, set any pairs that follow the highest-numbered captured string but are less than the number of capturing @@ -7118,7 +7171,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) if (mb->end_offset_top/2 <= re->top_bracket) { - register PCRE2_SIZE *iptr, *iend; + PCRE2_SIZE *iptr, *iend; int resetcount = re->top_bracket + 1; if (resetcount > match_data->oveccount) resetcount = match_data->oveccount; iptr = match_data->ovector + mb->end_offset_top; diff --git a/pcre2/src/pcre2_match_data.c b/pcre2/src/pcre2_match_data.c index 1f2fb1536..85ac99834 100644 --- a/pcre2/src/pcre2_match_data.c +++ b/pcre2/src/pcre2_match_data.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without diff --git a/pcre2/src/pcre2_newline.c b/pcre2/src/pcre2_newline.c index 7f482f245..6e9366db9 100644 --- a/pcre2/src/pcre2_newline.c +++ b/pcre2/src/pcre2_newline.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without diff --git a/pcre2/src/pcre2_ord2utf.c b/pcre2/src/pcre2_ord2utf.c index d268e94ee..140373099 100644 --- a/pcre2/src/pcre2_ord2utf.c +++ b/pcre2/src/pcre2_ord2utf.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -83,7 +83,7 @@ PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) /* Convert to UTF-8 */ #if PCRE2_CODE_UNIT_WIDTH == 8 -register int i, j; +int i, j; for (i = 0; i < PRIV(utf8_table1_size); i++) if ((int)cvalue <= PRIV(utf8_table1)[i]) break; buffer += i; diff --git a/pcre2/src/pcre2_pattern_info.c b/pcre2/src/pcre2_pattern_info.c index a0e734c9b..5b32a905b 100644 --- a/pcre2/src/pcre2_pattern_info.c +++ b/pcre2/src/pcre2_pattern_info.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -77,6 +77,7 @@ if (where == NULL) /* Requests field length */ case PCRE2_INFO_CAPTURECOUNT: case PCRE2_INFO_FIRSTCODETYPE: case PCRE2_INFO_FIRSTCODEUNIT: + case PCRE2_INFO_HASBACKSLASHC: case PCRE2_INFO_HASCRORLF: case PCRE2_INFO_JCHANGED: case PCRE2_INFO_LASTCODETYPE: @@ -151,6 +152,10 @@ switch(what) &(re->start_bitmap[0]) : NULL; break; + case PCRE2_INFO_HASBACKSLASHC: + *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0; + break; + case PCRE2_INFO_HASCRORLF: *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; break; diff --git a/pcre2/src/pcre2_printint.c b/pcre2/src/pcre2_printint.c index 2cd01ab63..620749764 100644 --- a/pcre2/src/pcre2_printint.c +++ b/pcre2/src/pcre2_printint.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -58,12 +58,13 @@ static const char *OP_names[] = { OP_NAME_LIST }; /* The functions and tables herein must all have mode-dependent names. */ -#define OP_lengths PCRE2_SUFFIX(OP_lengths_) -#define get_ucpname PCRE2_SUFFIX(get_ucpname_) -#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_) -#define print_char PCRE2_SUFFIX(print_char_) -#define print_custring PCRE2_SUFFIX(print_custring_) -#define print_prop PCRE2_SUFFIX(print_prop_) +#define OP_lengths PCRE2_SUFFIX(OP_lengths_) +#define get_ucpname PCRE2_SUFFIX(get_ucpname_) +#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_) +#define print_char PCRE2_SUFFIX(print_char_) +#define print_custring PCRE2_SUFFIX(print_custring_) +#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_) +#define print_prop PCRE2_SUFFIX(print_prop_) /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that the definition is next to the definition of the opcodes in pcre2_internal.h. @@ -188,12 +189,14 @@ return 0; * Print string as a list of code units * *************************************************/ -/* This takes no account of UTF as it always prints each individual code unit. -The string is zero-terminated. +/* These take no account of UTF as they always print each individual code unit. +The string is zero-terminated for print_custring(); the length is given for +print_custring_bylen(). Arguments: f file to write to ptr point to the string + len length for print_custring_bylen() Returns: nothing */ @@ -203,7 +206,17 @@ print_custring(FILE *f, PCRE2_SPTR ptr) { while (*ptr != '\0') { - register uint32_t c = *ptr++; + uint32_t c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} + +static void +print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len) +{ +for (; len > 0; len--) + { + uint32_t c = *ptr++; if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); } } @@ -603,7 +616,7 @@ for(;;) c = code[1 + 4*LINK_SIZE]; fprintf(f, " %s %c", OP_names[*code], c); extra = GET(code, 1 + 2*LINK_SIZE); - print_custring(f, code + 2 + 4*LINK_SIZE); + print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE); for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) if (c == PRIV(callout_start_delims)[i]) { @@ -791,7 +804,7 @@ for(;;) case OP_SKIP_ARG: case OP_THEN_ARG: fprintf(f, " %s ", OP_names[*code]); - print_custring(f, code + 2); + print_custring_bylen(f, code + 2, code[1]); extra += code[1]; break; diff --git a/pcre2/src/pcre2_serialize.c b/pcre2/src/pcre2_serialize.c index 828b9461e..0af26d8fc 100644 --- a/pcre2/src/pcre2_serialize.c +++ b/pcre2/src/pcre2_serialize.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -104,7 +104,7 @@ for (i = 0; i < number_of_codes; i++) return PCRE2_ERROR_MIXEDTABLES; total_size += re->blocksize; } - + /* Initialize the byte stream. */ bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data); if (bytes == NULL) return PCRE2_ERROR_NOMEMORY; @@ -158,6 +158,7 @@ int32_t i, j; if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL; if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; +if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA; if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; @@ -167,7 +168,7 @@ if (number_of_codes > data->number_of_codes) src_bytes = bytes + sizeof(pcre2_serialized_data); -/* Decode tables. The reference count for the tables is stored immediately +/* Decode tables. The reference count for the tables is stored immediately following them. */ tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data); @@ -179,8 +180,8 @@ src_bytes += tables_length; /* Decode the byte stream. We must not try to read the size from the compiled code block in the stream, because it might be unaligned, which causes errors on -hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type -of the blocksize field is given its own name to ensure that it is the same here +hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type +of the blocksize field is given its own name to ensure that it is the same here as in the block. */ for (i = 0; i < number_of_codes; i++) @@ -188,10 +189,12 @@ for (i = 0; i < number_of_codes; i++) CODE_BLOCKSIZE_TYPE blocksize; memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize), sizeof(CODE_BLOCKSIZE_TYPE)); + if (blocksize <= sizeof(pcre2_real_code)) + return PCRE2_ERROR_BADSERIALIZEDDATA; /* The allocator provided by gcontext replaces the original one. */ - - dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize, + + dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize, (pcre2_memctl *)gcontext); if (dst_re == NULL) { @@ -205,12 +208,16 @@ for (i = 0; i < number_of_codes; i++) } /* The new allocator must be preserved. */ - + memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl), src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl)); + if (dst_re->magic_number != MAGIC_NUMBER || + dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 || + dst_re->name_count > MAX_NAME_COUNT) + return PCRE2_ERROR_BADSERIALIZEDDATA; /* At the moment only one table is supported. */ - + dst_re->tables = tables; dst_re->executable_jit = NULL; dst_re->flags |= PCRE2_DEREF_TABLES; @@ -252,7 +259,7 @@ if (bytes != NULL) { pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl)); memctl->free(memctl, memctl->memory_data); - } + } } /* End of pcre2_serialize.c */ diff --git a/pcre2/src/pcre2_string_utils.c b/pcre2/src/pcre2_string_utils.c index 888620e19..2a1f28262 100644 --- a/pcre2/src/pcre2_string_utils.c +++ b/pcre2/src/pcre2_string_utils.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -121,7 +121,7 @@ int PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len) { PCRE2_UCHAR c1, c2; -while (len-- > 0) +for (; len > 0; len--) { c1 = *str1++; c2 = *str2++; @@ -150,7 +150,7 @@ int PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len) { PCRE2_UCHAR c1, c2; -while (len-- > 0) +for (; len > 0; len--) { c1 = *str1++; c2 = *str2++; diff --git a/pcre2/src/pcre2_study.c b/pcre2/src/pcre2_study.c index 25d7e5140..5a4d520c0 100644 --- a/pcre2/src/pcre2_study.c +++ b/pcre2/src/pcre2_study.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -50,6 +50,10 @@ collecting data (e.g. minimum matching length). */ #include "pcre2_internal.h" +/* The maximum remembered capturing brackets minimum. */ + +#define MAX_CACHE_BACKREF 128 + /* Set a bit in the starting code unit bit map. */ #define SET_BIT(c) re->start_bitmap[(c)/8] |= (1 << ((c)&7)) @@ -59,15 +63,23 @@ collecting data (e.g. minimum matching length). */ enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; - /************************************************* * Find the minimum subject length for a group * *************************************************/ /* Scan a parenthesized group and compute the minimum length of subject that is needed to match it. This is a lower bound; it does not mean there is a -string of that length that matches. In UTF8 mode, the result is in characters -rather than bytes. +string of that length that matches. In UTF mode, the result is in characters +rather than code units. The field in a compiled pattern for storing the minimum +length is 16-bits long (on the grounds that anything longer than that is +pathological), so we give up when we reach that amount. This also means that +integer overflow for really crazy patterns cannot happen. + +Backreference minimum lengths are cached to speed up multiple references. This +function is called only when the highest back reference in the pattern is less +than or equal to MAX_CACHE_BACKREF, which is one less than the size of the +caching vector. The zeroth element contains the number of the highest set +value. Arguments: re compiled pattern block @@ -75,35 +87,58 @@ Arguments: startcode pointer to start of the whole pattern's code utf UTF flag recurses chain of recurse_check to catch mutual recursion + countptr pointer to call count (to catch over complexity) + backref_cache vector for caching back references. Returns: the minimum length -1 \C in UTF-8 mode or (*ACCEPT) + or pattern too complicated + or back reference to duplicate name/number -2 internal error (missing capturing bracket) -3 internal error (opcode not listed) */ static int find_minlength(const pcre2_real_code *re, PCRE2_SPTR code, - PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses) + PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr, + int *backref_cache) { int length = -1; +int prev_cap_recno = -1; +int prev_cap_d = 0; +int prev_recurse_recno = -1; +int prev_recurse_d = 0; +uint32_t once_fudge = 0; BOOL had_recurse = FALSE; +BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0; recurse_check this_recurse; -register int branchlength = 0; -register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; +int branchlength = 0; +PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; -if (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; +/* If this is a "could be empty" group, its minimum length is 0. */ -/* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ +if (*code >= OP_SBRA && *code <= OP_SCOND) return 0; + +/* Skip over capturing bracket number */ + +if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE; + +/* A large and/or complex regex can take too long to process. */ + +if ((*countptr)++ > 1000) return -1; + +/* Scan along the opcodes for this branch. If we get to the end of the branch, +check the length against that of the other branches. If the accumulated length +passes 16-bits, stop. */ for (;;) { - int d, min; + int d, min, recno; PCRE2_UCHAR *cs, *ce; - register PCRE2_UCHAR op = *cc; + PCRE2_UCHAR op = *cc; + + if (branchlength >= UINT16_MAX) return UINT16_MAX; switch (op) { @@ -112,7 +147,8 @@ for (;;) /* If there is only one branch in a condition, the implied branch has zero length, so we don't add anything. This covers the DEFINE "condition" - automatically. */ + automatically. If there are two branches we can treat it the same as any + other non-capturing subpattern. */ cs = cc + GET(cc, 1); if (*cs != OP_ALT) @@ -120,23 +156,54 @@ for (;;) cc = cs + 1 + LINK_SIZE; break; } + goto PROCESS_NON_CAPTURE; - /* Otherwise we can fall through and treat it the same as any other - subpattern. */ + /* There's a special case of OP_ONCE, when it is wrapped round an + OP_RECURSE. We'd like to process the latter at this level so that + remembering the value works for repeated cases. So we do nothing, but + set a fudge value to skip over the OP_KET after the recurse. */ + + case OP_ONCE: + if (cc[1+LINK_SIZE] == OP_RECURSE && cc[2*(1+LINK_SIZE)] == OP_KET) + { + once_fudge = 1 + LINK_SIZE; + cc += 1 + LINK_SIZE; + break; + } + /* Fall through */ + + case OP_ONCE_NC: + case OP_BRA: + case OP_SBRA: + case OP_BRAPOS: + case OP_SBRAPOS: + PROCESS_NON_CAPTURE: + d = find_minlength(re, cc, startcode, utf, recurses, countptr, + backref_cache); + if (d < 0) return d; + branchlength += d; + do cc += GET(cc, 1); while (*cc == OP_ALT); + cc += 1 + LINK_SIZE; + break; + + /* To save time for repeated capturing subpatterns, we remember the + length of the previous one. Unfortunately we can't do the same for + the unnumbered ones above. Nor can we do this if (?| is present in the + pattern because captures with the same number are not then identical. */ case OP_CBRA: case OP_SCBRA: - case OP_BRA: - case OP_SBRA: case OP_CBRAPOS: case OP_SCBRAPOS: - case OP_BRAPOS: - case OP_SBRAPOS: - case OP_ONCE: - case OP_ONCE_NC: - d = find_minlength(re, cc, startcode, utf, recurses); - if (d < 0) return d; - branchlength += d; + recno = (int)GET2(cc, 1+LINK_SIZE); + if (dupcapused || recno != prev_cap_recno) + { + prev_cap_recno = recno; + prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr, + backref_cache); + if (prev_cap_d < 0) return prev_cap_d; + } + branchlength += prev_cap_d; do cc += GET(cc, 1); while (*cc == OP_ALT); cc += 1 + LINK_SIZE; break; @@ -388,8 +455,12 @@ for (;;) matches an empty string (by default it causes a matching failure), so in that case we must set the minimum length to zero. */ - case OP_DNREF: /* Duplicate named pattern back reference */ + /* Duplicate named pattern back reference. We cannot reliably find a length + for this if duplicate numbers are present in the pattern. */ + + case OP_DNREF: case OP_DNREFI: + if (dupcapused) return -1; if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) { int count = GET2(cc, 1+IMM2_SIZE); @@ -399,18 +470,80 @@ for (;;) d = INT_MAX; - /* Scan all groups with the same name */ + /* Scan all groups with the same name; find the shortest. */ while (count-- > 0) { - ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); + int dd, i; + recno = GET2(slot, 0); + + if (recno <= backref_cache[0] && backref_cache[recno] >= 0) + dd = backref_cache[recno]; + else + { + ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); + if (cs == NULL) return -2; + do ce += GET(ce, 1); while (*ce == OP_ALT); + if (cc > cs && cc < ce) /* Simple recursion */ + { + dd = 0; + had_recurse = TRUE; + } + else + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) + if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + { + dd = 0; + had_recurse = TRUE; + } + else + { + this_recurse.prev = recurses; + this_recurse.group = cs; + dd = find_minlength(re, cs, startcode, utf, &this_recurse, + countptr, backref_cache); + if (dd < 0) return dd; + } + } + + backref_cache[recno] = dd; + for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1; + backref_cache[0] = recno; + } + + if (dd < d) d = dd; + if (d <= 0) break; /* No point looking at any more */ + slot += re->name_entry_size; + } + } + else d = 0; + cc += 1 + 2*IMM2_SIZE; + goto REPEAT_BACK_REFERENCE; + + /* Single back reference. We cannot find a length for this if duplicate + numbers are present in the pattern. */ + + case OP_REF: + case OP_REFI: + if (dupcapused) return -1; + recno = GET2(cc, 1); + if (recno <= backref_cache[0] && backref_cache[recno] >= 0) + d = backref_cache[recno]; + else + { + int i; + if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) + { + ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); if (cc > cs && cc < ce) /* Simple recursion */ { d = 0; had_recurse = TRUE; - break; } else { @@ -420,54 +553,24 @@ for (;;) { d = 0; had_recurse = TRUE; - break; } else { - int dd; this_recurse.prev = recurses; this_recurse.group = cs; - dd = find_minlength(re, cs, startcode, utf, &this_recurse); - if (dd < d) d = dd; + d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr, + backref_cache); + if (d < 0) return d; } } - slot += re->name_entry_size; } - } - else d = 0; - cc += 1 + 2*IMM2_SIZE; - goto REPEAT_BACK_REFERENCE; + else d = 0; - case OP_REF: /* Single back reference */ - case OP_REFI: - if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) - { - ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); - if (cs == NULL) return -2; - do ce += GET(ce, 1); while (*ce == OP_ALT); - if (cc > cs && cc < ce) /* Simple recursion */ - { - d = 0; - had_recurse = TRUE; - } - else - { - recurse_check *r = recurses; - for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; - if (r != NULL) /* Mutual recursion */ - { - d = 0; - had_recurse = TRUE; - } - else - { - this_recurse.prev = recurses; - this_recurse.group = cs; - d = find_minlength(re, cs, startcode, utf, &this_recurse); - } - } + backref_cache[recno] = d; + for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1; + backref_cache[0] = recno; } - else d = 0; + cc += 1 + IMM2_SIZE; /* Handle repeated back references */ @@ -504,28 +607,51 @@ for (;;) break; } - branchlength += min * d; + /* Take care not to overflow: (1) min and d are ints, so check that their + product is not greater than INT_MAX. (2) branchlength is limited to + UINT16_MAX (checked at the top of the loop). */ + + if ((d > 0 && (INT_MAX/d) < min) || UINT16_MAX - branchlength < min*d) + branchlength = UINT16_MAX; + else branchlength += min * d; break; + /* Recursion always refers to the first occurrence of a subpattern with a + given number. Therefore, we can always make use of caching, even when the + pattern contains multiple subpatterns with the same number. */ + case OP_RECURSE: cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1); - do ce += GET(ce, 1); while (*ce == OP_ALT); - if (cc > cs && cc < ce) /* Simple recursion */ - had_recurse = TRUE; + recno = GET2(cs, 1+LINK_SIZE); + if (recno == prev_recurse_recno) + { + branchlength += prev_recurse_d; + } else { - recurse_check *r = recurses; - for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; - if (r != NULL) /* Mutual recursion */ + do ce += GET(ce, 1); while (*ce == OP_ALT); + if (cc > cs && cc < ce) /* Simple recursion */ had_recurse = TRUE; else { - this_recurse.prev = recurses; - this_recurse.group = cs; - branchlength += find_minlength(re, cs, startcode, utf, &this_recurse); + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + had_recurse = TRUE; + else + { + this_recurse.prev = recurses; + this_recurse.group = cs; + prev_recurse_d = find_minlength(re, cs, startcode, utf, &this_recurse, + countptr, backref_cache); + if (prev_recurse_d < 0) return prev_recurse_d; + prev_recurse_recno = recno; + branchlength += prev_recurse_d; + } } } - cc += 1 + LINK_SIZE; + cc += 1 + LINK_SIZE + once_fudge; + once_fudge = 0; break; /* Anything else does not or need not match a character. We can get the @@ -708,7 +834,7 @@ Returns: nothing static void set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) { -register uint32_t c; +uint32_t c; for (c = 0; c < table_limit; c++) re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type]; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 @@ -749,7 +875,7 @@ Returns: nothing static void set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) { -register uint32_t c; +uint32_t c; for (c = 0; c < table_limit; c++) re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 @@ -789,7 +915,7 @@ Returns: SSB_FAIL => Failed to find any starting code units static int set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf) { -register uint32_t c; +uint32_t c; int yield = SSB_DONE; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 @@ -1368,7 +1494,7 @@ do for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c]; for (c = 128; c < 256; c++) { - if ((classmap[c/8] && (1 << (c&7))) != 0) + if ((classmap[c/8] & (1 << (c&7))) != 0) { int d = (c >> 6) | 0xc0; /* Set bit for this starter */ re->start_bitmap[d/8] |= (1 << (d&7)); /* and then skip on to the */ @@ -1441,6 +1567,7 @@ int PRIV(study)(pcre2_real_code *re) { int min; +int count = 0; PCRE2_UCHAR *code; BOOL utf = (re->overall_options & PCRE2_UTF) != 0; @@ -1461,22 +1588,35 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 && if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET; } -/* Find the minimum length of subject string. */ +/* Find the minimum length of subject string. If the pattern can match an empty +string, the minimum length is already known. If there are more back references +than the size of the vector we are going to cache them in, do nothing. A +pattern that complicated will probably take a long time to analyze and may in +any case turn out to be too complicated. Note that back reference minima are +held as 16-bit numbers. */ -switch(min = find_minlength(re, code, code, utf, NULL)) +if ((re->flags & PCRE2_MATCH_EMPTY) == 0 && + re->top_backref <= MAX_CACHE_BACKREF) { - case -1: /* \C in UTF mode or (*ACCEPT) */ - break; /* Leave minlength unchanged (will be zero) */ + int backref_cache[MAX_CACHE_BACKREF+1]; + backref_cache[0] = 0; /* Highest one that is set */ + min = find_minlength(re, code, code, utf, NULL, &count, backref_cache); + switch(min) + { + case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */ + break; /* Leave minlength unchanged (will be zero) */ - case -2: - return 2; /* missing capturing bracket */ + case -2: + return 2; /* missing capturing bracket */ - case -3: - return 3; /* unrecognized opcode */ + case -3: + return 3; /* unrecognized opcode */ - default: - re->minlength = min; - break; + default: + if (min > UINT16_MAX) min = UINT16_MAX; + re->minlength = min; + break; + } } return 0; diff --git a/pcre2/src/pcre2_substitute.c b/pcre2/src/pcre2_substitute.c index ec00ebb86..8da951fc6 100644 --- a/pcre2/src/pcre2_substitute.c +++ b/pcre2/src/pcre2_substitute.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -45,6 +45,123 @@ POSSIBILITY OF SUCH DAMAGE. #include "pcre2_internal.h" +#define PTR_STACK_SIZE 20 + +#define SUBSTITUTE_OPTIONS \ + (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \ + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \ + PCRE2_SUBSTITUTE_UNSET_EMPTY) + + + +/************************************************* +* Find end of substitute text * +*************************************************/ + +/* In extended mode, we recognize ${name:+set text:unset text} and similar +constructions. This requires the identification of unescaped : and } +characters. This function scans for such. It must deal with nested ${ +constructions. The pointer to the text is updated, either to the required end +character, or to where an error was detected. + +Arguments: + code points to the compiled expression (for options) + ptrptr points to the pointer to the start of the text (updated) + ptrend end of the whole string + last TRUE if the last expected string (only } recognized) + +Returns: 0 on success + negative error code on failure +*/ + +static int +find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, + BOOL last) +{ +int rc = 0; +uint32_t nestlevel = 0; +BOOL literal = FALSE; +PCRE2_SPTR ptr = *ptrptr; + +for (; ptr < ptrend; ptr++) + { + if (literal) + { + if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E) + { + literal = FALSE; + ptr += 1; + } + } + + else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) + { + if (nestlevel == 0) goto EXIT; + nestlevel--; + } + + else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT; + + else if (*ptr == CHAR_DOLLAR_SIGN) + { + if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET) + { + nestlevel++; + ptr += 1; + } + } + + else if (*ptr == CHAR_BACKSLASH) + { + int erc; + int errorcode; + uint32_t ch; + + if (ptr < ptrend - 1) switch (ptr[1]) + { + case CHAR_L: + case CHAR_l: + case CHAR_U: + case CHAR_u: + ptr += 1; + continue; + } + + ptr += 1; /* Must point after \ */ + erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, + code->overall_options, FALSE, NULL); + ptr -= 1; /* Back to last code unit of escape */ + if (errorcode != 0) + { + rc = errorcode; + goto EXIT; + } + + switch(erc) + { + case 0: /* Data character */ + case ESC_E: /* Isolated \E is ignored */ + break; + + case ESC_Q: + literal = TRUE; + break; + + default: + rc = PCRE2_ERROR_BADREPESCAPE; + goto EXIT; + } + } + } + +rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */ + +EXIT: +*ptrptr = ptr; +return rc; +} + + /************************************************* * Match and substitute * @@ -72,6 +189,30 @@ Returns: >= 0 number of substitutions made PCRE2_ERROR_BADREPLACEMENT means invalid use of $ */ +/* This macro checks for space in the buffer before copying into it. On +overflow, either give an error immediately, or keep on, accumulating the +length. */ + +#define CHECKMEMCPY(from,length) \ + if (!overflowed && lengthleft < length) \ + { \ + if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ + overflowed = TRUE; \ + extra_needed = length - lengthleft; \ + } \ + else if (overflowed) \ + { \ + extra_needed += length; \ + } \ + else \ + { \ + memcpy(buffer + buff_offset, from, CU2BYTES(length)); \ + buff_offset += length; \ + lengthleft -= length; \ + } + +/* Here's the function */ + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, @@ -80,13 +221,28 @@ pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, { int rc; int subs; +int forcecase = 0; +int forcecasereset = 0; uint32_t ovector_count; uint32_t goptions = 0; +uint32_t suboptions; BOOL match_data_created = FALSE; -BOOL global = FALSE; -PCRE2_SIZE buff_offset, lengthleft, fraglength; +BOOL literal = FALSE; +BOOL overflowed = FALSE; +#ifdef SUPPORT_UNICODE +BOOL utf = (code->overall_options & PCRE2_UTF) != 0; +#endif +PCRE2_UCHAR temp[6]; +PCRE2_SPTR ptr; +PCRE2_SPTR repend; +PCRE2_SIZE extra_needed = 0; +PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; PCRE2_SIZE *ovector; +buff_offset = 0; +lengthleft = buff_length = *blength; +*blength = PCRE2_UNSET; + /* Partial matching is not valid. */ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) @@ -106,11 +262,16 @@ if (match_data == NULL) ovector = pcre2_get_ovector_pointer(match_data); ovector_count = pcre2_get_ovector_count(match_data); +/* Find lengths of zero-terminated strings and the end of the replacement. */ + +if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); +if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); +repend = replacement + rlength; + /* Check UTF replacement string if necessary. */ #ifdef SUPPORT_UNICODE -if ((code->overall_options & PCRE2_UTF) != 0 && - (options & PCRE2_NO_UTF_CHECK) == 0) +if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) { rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar)); if (rc != 0) @@ -121,37 +282,36 @@ if ((code->overall_options & PCRE2_UTF) != 0 && } #endif /* SUPPORT_UNICODE */ -/* Notice the global option and remove it from the options that are passed to -pcre2_match(). */ +/* Save the substitute options and remove them from the match options. */ -if ((options & PCRE2_SUBSTITUTE_GLOBAL) != 0) - { - options &= ~PCRE2_SUBSTITUTE_GLOBAL; - global = TRUE; - } - -/* Find lengths of zero-terminated strings. */ - -if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); -if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); +suboptions = options & SUBSTITUTE_OPTIONS; +options &= ~SUBSTITUTE_OPTIONS; /* Copy up to the start offset */ -if (start_offset > *blength) goto NOROOM; -memcpy(buffer, subject, start_offset * (PCRE2_CODE_UNIT_WIDTH/8)); -buff_offset = start_offset; -lengthleft = *blength - start_offset; +if (start_offset > length) + { + match_data->leftchar = 0; + rc = PCRE2_ERROR_BADOFFSET; + goto EXIT; + } +CHECKMEMCPY(subject, start_offset); /* Loop for global substituting. */ subs = 0; do { - PCRE2_SIZE i; + PCRE2_SPTR ptrstack[PTR_STACK_SIZE]; + uint32_t ptrstackptr = 0; rc = pcre2_match(code, subject, length, start_offset, options|goptions, match_data, mcontext); +#ifdef SUPPORT_UNICODE + if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */ +#endif + /* Any error other than no match returns the error code. No match when not doing the special after-empty-match global rematch, or when at the end of the subject, breaks the global loop. Otherwise, advance the starting point by one @@ -164,8 +324,22 @@ do if (rc != PCRE2_ERROR_NOMATCH) goto EXIT; if (goptions == 0 || start_offset >= length) break; + /* Advance by one code point. Then, if CRLF is a valid newline sequence and + we have advanced into the middle of it, advance one more code point. In + other words, do not start in the middle of CRLF, even if CR and LF on their + own are valid newlines. */ + save_start = start_offset++; - if ((code->overall_options & PCRE2_UTF) != 0) + if (subject[start_offset-1] == CHAR_CR && + code->newline_convention != PCRE2_NEWLINE_CR && + code->newline_convention != PCRE2_NEWLINE_LF && + start_offset < length && + subject[start_offset] == CHAR_LF) + start_offset++; + + /* Otherwise, in UTF mode, advance past any secondary code points. */ + + else if ((code->overall_options & PCRE2_UTF) != 0) { #if PCRE2_CODE_UNIT_WIDTH == 8 while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80) @@ -177,60 +351,138 @@ do #endif } - fraglength = start_offset - save_start; - if (lengthleft < fraglength) goto NOROOM; - memcpy(buffer + buff_offset, subject + save_start, - fraglength*(PCRE2_CODE_UNIT_WIDTH/8)); - buff_offset += fraglength; - lengthleft -= fraglength; + /* Copy what we have advanced past, reset the special global options, and + continue to the next match. */ + fraglength = start_offset - save_start; + CHECKMEMCPY(subject + save_start, fraglength); goptions = 0; continue; } - /* Handle a successful match. */ + /* Handle a successful match. Matches that use \K to end before they start + are not supported. */ + if (ovector[1] < ovector[0]) + { + rc = PCRE2_ERROR_BADSUBSPATTERN; + goto EXIT; + } + + /* Count substitutions with a paranoid check for integer overflow; surely no + real call to this function would ever hit this! */ + + if (subs == INT_MAX) + { + rc = PCRE2_ERROR_TOOMANYREPLACE; + goto EXIT; + } subs++; + + /* Copy the text leading up to the match. */ + if (rc == 0) rc = ovector_count; fraglength = ovector[0] - start_offset; - if (fraglength >= lengthleft) goto NOROOM; - memcpy(buffer + buff_offset, subject + start_offset, - fraglength*(PCRE2_CODE_UNIT_WIDTH/8)); - buff_offset += fraglength; - lengthleft -= fraglength; + CHECKMEMCPY(subject + start_offset, fraglength); - for (i = 0; i < rlength; i++) + /* Process the replacement string. Literal mode is set by \Q, but only in + extended mode when backslashes are being interpreted. In extended mode we + must handle nested substrings that are to be reprocessed. */ + + ptr = replacement; + for (;;) { - if (replacement[i] == CHAR_DOLLAR_SIGN) + uint32_t ch; + unsigned int chlen; + + /* If at the end of a nested substring, pop the stack. */ + + if (ptr >= repend) + { + if (ptrstackptr <= 0) break; /* End of replacement string */ + repend = ptrstack[--ptrstackptr]; + ptr = ptrstack[--ptrstackptr]; + continue; + } + + /* Handle the next character */ + + if (literal) + { + if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E) + { + literal = FALSE; + ptr += 2; + continue; + } + goto LOADLITERAL; + } + + /* Not in literal mode. */ + + if (*ptr == CHAR_DOLLAR_SIGN) { int group, n; + uint32_t special = 0; BOOL inparens; + BOOL star; PCRE2_SIZE sublength; + PCRE2_SPTR text1_start = NULL; + PCRE2_SPTR text1_end = NULL; + PCRE2_SPTR text2_start = NULL; + PCRE2_SPTR text2_end = NULL; PCRE2_UCHAR next; PCRE2_UCHAR name[33]; - if (++i == rlength) goto BAD; - if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL; + if (++ptr >= repend) goto BAD; + if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL; group = -1; n = 0; inparens = FALSE; + star = FALSE; if (next == CHAR_LEFT_CURLY_BRACKET) { - if (++i == rlength) goto BAD; - next = replacement[i]; + if (++ptr >= repend) goto BAD; + next = *ptr; inparens = TRUE; } - if (next >= CHAR_0 && next <= CHAR_9) + if (next == CHAR_ASTERISK) + { + if (++ptr >= repend) goto BAD; + next = *ptr; + star = TRUE; + } + + if (!star && next >= CHAR_0 && next <= CHAR_9) { group = next - CHAR_0; - while (++i < rlength) + while (++ptr < repend) { - next = replacement[i]; + next = *ptr; if (next < CHAR_0 || next > CHAR_9) break; group = group * 10 + next - CHAR_0; + + /* A check for a number greater than the hightest captured group + is sufficient here; no need for a separate overflow check. If unknown + groups are to be treated as unset, just skip over any remaining + digits and carry on. */ + + if (group > code->top_bracket) + { + if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) + { + while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9); + break; + } + else + { + rc = PCRE2_ERROR_NOSUBSTRING; + goto PTREXIT; + } + } } } else @@ -240,43 +492,312 @@ do { name[n++] = next; if (n > 32) goto BAD; - if (i == rlength) break; - next = replacement[++i]; + if (++ptr >= repend) break; + next = *ptr; } if (n == 0) goto BAD; name[n] = 0; } + /* In extended mode we recognize ${name:+set text:unset text} and + ${name:-default text}. */ + if (inparens) { - if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD; + if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && + !star && ptr < repend - 2 && next == CHAR_COLON) + { + special = *(++ptr); + if (special != CHAR_PLUS && special != CHAR_MINUS) + { + rc = PCRE2_ERROR_BADSUBSTITUTION; + goto PTREXIT; + } + + text1_start = ++ptr; + rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS); + if (rc != 0) goto PTREXIT; + text1_end = ptr; + + if (special == CHAR_PLUS && *ptr == CHAR_COLON) + { + text2_start = ++ptr; + rc = find_text_end(code, &ptr, repend, TRUE); + if (rc != 0) goto PTREXIT; + text2_end = ptr; + } + } + + else + { + if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET) + { + rc = PCRE2_ERROR_REPMISSINGBRACE; + goto PTREXIT; + } + } + + ptr++; } - else i--; /* Last code unit of name/number */ - /* Have found a syntactically correct group number or name. */ + /* Have found a syntactically correct group number or name, or *name. + Only *MARK is currently recognized. */ + + if (star) + { + if (PRIV(strcmp_c8)(name, STRING_MARK) == 0) + { + PCRE2_SPTR mark = pcre2_get_mark(match_data); + if (mark != NULL) + { + PCRE2_SPTR mark_start = mark; + while (*mark != 0) mark++; + fraglength = mark - mark_start; + CHECKMEMCPY(mark_start, fraglength); + } + } + else goto BAD; + } + + /* Substitute the contents of a group. We don't use substring_copy + functions any more, in order to support case forcing. */ - sublength = lengthleft; - if (group < 0) - rc = pcre2_substring_copy_byname(match_data, name, - buffer + buff_offset, &sublength); else - rc = pcre2_substring_copy_bynumber(match_data, group, - buffer + buff_offset, &sublength); + { + PCRE2_SPTR subptr, subptrend; - if (rc < 0) goto EXIT; - buff_offset += sublength; - lengthleft -= sublength; + /* Find a number for a named group. In case there are duplicate names, + search for the first one that is set. If the name is not found when + PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a + non-existent group. */ + + if (group < 0) + { + PCRE2_SPTR first, last, entry; + rc = pcre2_substring_nametable_scan(code, name, &first, &last); + if (rc == PCRE2_ERROR_NOSUBSTRING && + (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) + { + group = code->top_bracket + 1; + } + else + { + if (rc < 0) goto PTREXIT; + for (entry = first; entry <= last; entry += rc) + { + uint32_t ng = GET2(entry, 0); + if (ng < ovector_count) + { + if (group < 0) group = ng; /* First in ovector */ + if (ovector[ng*2] != PCRE2_UNSET) + { + group = ng; /* First that is set */ + break; + } + } + } + + /* If group is still negative, it means we did not find a group + that is in the ovector. Just set the first group. */ + + if (group < 0) group = GET2(first, 0); + } + } + + /* We now have a group that is identified by number. Find the length of + the captured string. If a group in a non-special substitution is unset + when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */ + + rc = pcre2_substring_length_bynumber(match_data, group, &sublength); + if (rc < 0) + { + if (rc == PCRE2_ERROR_NOSUBSTRING && + (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) + { + rc = PCRE2_ERROR_UNSET; + } + if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */ + if (special == 0) /* Plain substitution */ + { + if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue; + goto PTREXIT; /* Else error */ + } + } + + /* If special is '+' we have a 'set' and possibly an 'unset' text, + both of which are reprocessed when used. If special is '-' we have a + default text for when the group is unset; it must be reprocessed. */ + + if (special != 0) + { + if (special == CHAR_MINUS) + { + if (rc == 0) goto LITERAL_SUBSTITUTE; + text2_start = text1_start; + text2_end = text1_end; + } + + if (ptrstackptr >= PTR_STACK_SIZE) goto BAD; + ptrstack[ptrstackptr++] = ptr; + ptrstack[ptrstackptr++] = repend; + + if (rc == 0) + { + ptr = text1_start; + repend = text1_end; + } + else + { + ptr = text2_start; + repend = text2_end; + } + continue; + } + + /* Otherwise we have a literal substitution of a group's contents. */ + + LITERAL_SUBSTITUTE: + subptr = subject + ovector[group*2]; + subptrend = subject + ovector[group*2 + 1]; + + /* Substitute a literal string, possibly forcing alphabetic case. */ + + while (subptr < subptrend) + { + GETCHARINCTEST(ch, subptr); + if (forcecase != 0) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t type = UCD_CHARTYPE(ch); + if (PRIV(ucp_gentype)[type] == ucp_L && + type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) + ch = UCD_OTHERCASE(ch); + } + else +#endif + { + if (((code->tables + cbits_offset + + ((forcecase > 0)? cbit_upper:cbit_lower) + )[ch/8] & (1 << (ch%8))) == 0) + ch = (code->tables + fcc_offset)[ch]; + } + forcecase = forcecasereset; + } + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + CHECKMEMCPY(temp, chlen); + } + } } - /* Handle a literal code unit */ + /* Handle an escape sequence in extended mode. We can use check_escape() + to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but + the case-forcing escapes are not supported in pcre2_compile() so must be + recognized here. */ - else + else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && + *ptr == CHAR_BACKSLASH) { - LITERAL: - if (lengthleft-- < 1) goto NOROOM; - buffer[buff_offset++] = replacement[i]; + int errorcode; + + if (ptr < repend - 1) switch (ptr[1]) + { + case CHAR_L: + forcecase = forcecasereset = -1; + ptr += 2; + continue; + + case CHAR_l: + forcecase = -1; + forcecasereset = 0; + ptr += 2; + continue; + + case CHAR_U: + forcecase = forcecasereset = 1; + ptr += 2; + continue; + + case CHAR_u: + forcecase = 1; + forcecasereset = 0; + ptr += 2; + continue; + + default: + break; + } + + ptr++; /* Point after \ */ + rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, + code->overall_options, FALSE, NULL); + if (errorcode != 0) goto BADESCAPE; + + switch(rc) + { + case ESC_E: + forcecase = forcecasereset = 0; + continue; + + case ESC_Q: + literal = TRUE; + continue; + + case 0: /* Data character */ + goto LITERAL; + + default: + goto BADESCAPE; + } } - } + + /* Handle a literal code unit */ + + else + { + LOADLITERAL: + GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */ + + LITERAL: + if (forcecase != 0) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t type = UCD_CHARTYPE(ch); + if (PRIV(ucp_gentype)[type] == ucp_L && + type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) + ch = UCD_OTHERCASE(ch); + } + else +#endif + { + if (((code->tables + cbits_offset + + ((forcecase > 0)? cbit_upper:cbit_lower) + )[ch/8] & (1 << (ch%8))) == 0) + ch = (code->tables + fcc_offset)[ch]; + } + forcecase = forcecasereset; + } + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + CHECKMEMCPY(temp, chlen); + } /* End handling a literal code unit */ + } /* End of loop for scanning the replacement. */ /* The replacement has been copied to the output. Update the start offset to point to the rest of the subject string. If we matched an empty string, @@ -285,18 +806,33 @@ do start_offset = ovector[1]; goptions = (ovector[0] != ovector[1])? 0 : PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; - } while (global); /* Repeat "do" loop */ + } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ -/* Copy the rest of the subject and return the number of substitutions. */ +/* Copy the rest of the subject. */ -rc = subs; fraglength = length - start_offset; -if (fraglength + 1 > lengthleft) goto NOROOM; -memcpy(buffer + buff_offset, subject + start_offset, - fraglength*(PCRE2_CODE_UNIT_WIDTH/8)); -buff_offset += fraglength; -buffer[buff_offset] = 0; -*blength = buff_offset; +CHECKMEMCPY(subject + start_offset, fraglength); +temp[0] = 0; +CHECKMEMCPY(temp , 1); + +/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set, +and matching has carried on after a full buffer, in order to compute the length +needed. Otherwise, an overflow generates an immediate error return. */ + +if (overflowed) + { + rc = PCRE2_ERROR_NOMEMORY; + *blength = buff_length + extra_needed; + } + +/* After a successful execution, return the number of substitutions and set the +length of buffer used, excluding the trailing zero. */ + +else + { + rc = subs; + *blength = buff_offset - 1; + } EXIT: if (match_data_created) pcre2_match_data_free(match_data); @@ -309,6 +845,13 @@ goto EXIT; BAD: rc = PCRE2_ERROR_BADREPLACEMENT; +goto PTREXIT; + +BADESCAPE: +rc = PCRE2_ERROR_BADREPESCAPE; + +PTREXIT: +*blength = (PCRE2_SIZE)(ptr - replacement); goto EXIT; } diff --git a/pcre2/src/pcre2_substring.c b/pcre2/src/pcre2_substring.c index eb72ad7d0..f6d7c3972 100644 --- a/pcre2/src/pcre2_substring.c +++ b/pcre2/src/pcre2_substring.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -240,8 +240,11 @@ Returns: nothing PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_substring_free(PCRE2_UCHAR *string) { -pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl)); -memctl->free(memctl, memctl->memory_data); +if (string != NULL) + { + pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl)); + memctl->free(memctl, memctl->memory_data); + } } @@ -436,8 +439,11 @@ Returns: nothing PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_substring_list_free(PCRE2_SPTR *list) { -pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl)); -memctl->free(memctl, memctl->memory_data); +if (list != NULL) + { + pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl)); + memctl->free(memctl, memctl->memory_data); + } } diff --git a/pcre2/src/pcre2_tables.c b/pcre2/src/pcre2_tables.c index 17e4537d4..b945ed7a7 100644 --- a/pcre2/src/pcre2_tables.c +++ b/pcre2/src/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -227,6 +227,8 @@ version. Like all other character and string literals that are compared against the regular expression pattern, we must use STR_ macros instead of literal strings to make sure that UTF-8 support works on EBCDIC platforms. */ +#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0" +#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" #define STRING_Any0 STR_A STR_n STR_y "\0" #define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0" #define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0" @@ -274,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Han0 STR_H STR_a STR_n "\0" #define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0" #define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0" +#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0" #define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0" #define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0" #define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0" @@ -321,6 +324,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Modi0 STR_M STR_o STR_d STR_i "\0" #define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0" #define STRING_Mro0 STR_M STR_r STR_o "\0" +#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0" #define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0" #define STRING_N0 STR_N "\0" #define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0" @@ -331,6 +335,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_No0 STR_N STR_o "\0" #define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0" #define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0" +#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0" #define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0" #define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" #define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0" @@ -362,6 +367,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0" #define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0" #define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0" +#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0" #define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0" #define STRING_Sk0 STR_S STR_k "\0" #define STRING_Sm0 STR_S STR_m "\0" @@ -398,6 +404,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Zs0 STR_Z STR_s "\0" const char PRIV(utt_names)[] = + STRING_Ahom0 + STRING_Anatolian_Hieroglyphs0 STRING_Any0 STRING_Arabic0 STRING_Armenian0 @@ -445,6 +453,7 @@ const char PRIV(utt_names)[] = STRING_Han0 STRING_Hangul0 STRING_Hanunoo0 + STRING_Hatran0 STRING_Hebrew0 STRING_Hiragana0 STRING_Imperial_Aramaic0 @@ -492,6 +501,7 @@ const char PRIV(utt_names)[] = STRING_Modi0 STRING_Mongolian0 STRING_Mro0 + STRING_Multani0 STRING_Myanmar0 STRING_N0 STRING_Nabataean0 @@ -502,6 +512,7 @@ const char PRIV(utt_names)[] = STRING_No0 STRING_Ogham0 STRING_Ol_Chiki0 + STRING_Old_Hungarian0 STRING_Old_Italic0 STRING_Old_North_Arabian0 STRING_Old_Permic0 @@ -533,6 +544,7 @@ const char PRIV(utt_names)[] = STRING_Sharada0 STRING_Shavian0 STRING_Siddham0 + STRING_SignWriting0 STRING_Sinhala0 STRING_Sk0 STRING_Sm0 @@ -569,175 +581,181 @@ const char PRIV(utt_names)[] = STRING_Zs0; const ucp_type_table PRIV(utt)[] = { - { 0, PT_ANY, 0 }, - { 4, PT_SC, ucp_Arabic }, - { 11, PT_SC, ucp_Armenian }, - { 20, PT_SC, ucp_Avestan }, - { 28, PT_SC, ucp_Balinese }, - { 37, PT_SC, ucp_Bamum }, - { 43, PT_SC, ucp_Bassa_Vah }, - { 53, PT_SC, ucp_Batak }, - { 59, PT_SC, ucp_Bengali }, - { 67, PT_SC, ucp_Bopomofo }, - { 76, PT_SC, ucp_Brahmi }, - { 83, PT_SC, ucp_Braille }, - { 91, PT_SC, ucp_Buginese }, - { 100, PT_SC, ucp_Buhid }, - { 106, PT_GC, ucp_C }, - { 108, PT_SC, ucp_Canadian_Aboriginal }, - { 128, PT_SC, ucp_Carian }, - { 135, PT_SC, ucp_Caucasian_Albanian }, - { 154, PT_PC, ucp_Cc }, - { 157, PT_PC, ucp_Cf }, - { 160, PT_SC, ucp_Chakma }, - { 167, PT_SC, ucp_Cham }, - { 172, PT_SC, ucp_Cherokee }, - { 181, PT_PC, ucp_Cn }, - { 184, PT_PC, ucp_Co }, - { 187, PT_SC, ucp_Common }, - { 194, PT_SC, ucp_Coptic }, - { 201, PT_PC, ucp_Cs }, - { 204, PT_SC, ucp_Cuneiform }, - { 214, PT_SC, ucp_Cypriot }, - { 222, PT_SC, ucp_Cyrillic }, - { 231, PT_SC, ucp_Deseret }, - { 239, PT_SC, ucp_Devanagari }, - { 250, PT_SC, ucp_Duployan }, - { 259, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 280, PT_SC, ucp_Elbasan }, - { 288, PT_SC, ucp_Ethiopic }, - { 297, PT_SC, ucp_Georgian }, - { 306, PT_SC, ucp_Glagolitic }, - { 317, PT_SC, ucp_Gothic }, - { 324, PT_SC, ucp_Grantha }, - { 332, PT_SC, ucp_Greek }, - { 338, PT_SC, ucp_Gujarati }, - { 347, PT_SC, ucp_Gurmukhi }, - { 356, PT_SC, ucp_Han }, - { 360, PT_SC, ucp_Hangul }, - { 367, PT_SC, ucp_Hanunoo }, - { 375, PT_SC, ucp_Hebrew }, - { 382, PT_SC, ucp_Hiragana }, - { 391, PT_SC, ucp_Imperial_Aramaic }, - { 408, PT_SC, ucp_Inherited }, - { 418, PT_SC, ucp_Inscriptional_Pahlavi }, - { 440, PT_SC, ucp_Inscriptional_Parthian }, - { 463, PT_SC, ucp_Javanese }, - { 472, PT_SC, ucp_Kaithi }, - { 479, PT_SC, ucp_Kannada }, - { 487, PT_SC, ucp_Katakana }, - { 496, PT_SC, ucp_Kayah_Li }, - { 505, PT_SC, ucp_Kharoshthi }, - { 516, PT_SC, ucp_Khmer }, - { 522, PT_SC, ucp_Khojki }, - { 529, PT_SC, ucp_Khudawadi }, - { 539, PT_GC, ucp_L }, - { 541, PT_LAMP, 0 }, - { 544, PT_SC, ucp_Lao }, - { 548, PT_SC, ucp_Latin }, - { 554, PT_SC, ucp_Lepcha }, - { 561, PT_SC, ucp_Limbu }, - { 567, PT_SC, ucp_Linear_A }, - { 576, PT_SC, ucp_Linear_B }, - { 585, PT_SC, ucp_Lisu }, - { 590, PT_PC, ucp_Ll }, - { 593, PT_PC, ucp_Lm }, - { 596, PT_PC, ucp_Lo }, - { 599, PT_PC, ucp_Lt }, - { 602, PT_PC, ucp_Lu }, - { 605, PT_SC, ucp_Lycian }, - { 612, PT_SC, ucp_Lydian }, - { 619, PT_GC, ucp_M }, - { 621, PT_SC, ucp_Mahajani }, - { 630, PT_SC, ucp_Malayalam }, - { 640, PT_SC, ucp_Mandaic }, - { 648, PT_SC, ucp_Manichaean }, - { 659, PT_PC, ucp_Mc }, - { 662, PT_PC, ucp_Me }, - { 665, PT_SC, ucp_Meetei_Mayek }, - { 678, PT_SC, ucp_Mende_Kikakui }, - { 692, PT_SC, ucp_Meroitic_Cursive }, - { 709, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 730, PT_SC, ucp_Miao }, - { 735, PT_PC, ucp_Mn }, - { 738, PT_SC, ucp_Modi }, - { 743, PT_SC, ucp_Mongolian }, - { 753, PT_SC, ucp_Mro }, - { 757, PT_SC, ucp_Myanmar }, - { 765, PT_GC, ucp_N }, - { 767, PT_SC, ucp_Nabataean }, - { 777, PT_PC, ucp_Nd }, - { 780, PT_SC, ucp_New_Tai_Lue }, - { 792, PT_SC, ucp_Nko }, - { 796, PT_PC, ucp_Nl }, - { 799, PT_PC, ucp_No }, - { 802, PT_SC, ucp_Ogham }, - { 808, PT_SC, ucp_Ol_Chiki }, - { 817, PT_SC, ucp_Old_Italic }, - { 828, PT_SC, ucp_Old_North_Arabian }, - { 846, PT_SC, ucp_Old_Permic }, - { 857, PT_SC, ucp_Old_Persian }, - { 869, PT_SC, ucp_Old_South_Arabian }, - { 887, PT_SC, ucp_Old_Turkic }, - { 898, PT_SC, ucp_Oriya }, - { 904, PT_SC, ucp_Osmanya }, - { 912, PT_GC, ucp_P }, - { 914, PT_SC, ucp_Pahawh_Hmong }, - { 927, PT_SC, ucp_Palmyrene }, - { 937, PT_SC, ucp_Pau_Cin_Hau }, - { 949, PT_PC, ucp_Pc }, - { 952, PT_PC, ucp_Pd }, - { 955, PT_PC, ucp_Pe }, - { 958, PT_PC, ucp_Pf }, - { 961, PT_SC, ucp_Phags_Pa }, - { 970, PT_SC, ucp_Phoenician }, - { 981, PT_PC, ucp_Pi }, - { 984, PT_PC, ucp_Po }, - { 987, PT_PC, ucp_Ps }, - { 990, PT_SC, ucp_Psalter_Pahlavi }, - { 1006, PT_SC, ucp_Rejang }, - { 1013, PT_SC, ucp_Runic }, - { 1019, PT_GC, ucp_S }, - { 1021, PT_SC, ucp_Samaritan }, - { 1031, PT_SC, ucp_Saurashtra }, - { 1042, PT_PC, ucp_Sc }, - { 1045, PT_SC, ucp_Sharada }, - { 1053, PT_SC, ucp_Shavian }, - { 1061, PT_SC, ucp_Siddham }, - { 1069, PT_SC, ucp_Sinhala }, - { 1077, PT_PC, ucp_Sk }, - { 1080, PT_PC, ucp_Sm }, - { 1083, PT_PC, ucp_So }, - { 1086, PT_SC, ucp_Sora_Sompeng }, - { 1099, PT_SC, ucp_Sundanese }, - { 1109, PT_SC, ucp_Syloti_Nagri }, - { 1122, PT_SC, ucp_Syriac }, - { 1129, PT_SC, ucp_Tagalog }, - { 1137, PT_SC, ucp_Tagbanwa }, - { 1146, PT_SC, ucp_Tai_Le }, - { 1153, PT_SC, ucp_Tai_Tham }, - { 1162, PT_SC, ucp_Tai_Viet }, - { 1171, PT_SC, ucp_Takri }, - { 1177, PT_SC, ucp_Tamil }, - { 1183, PT_SC, ucp_Telugu }, - { 1190, PT_SC, ucp_Thaana }, - { 1197, PT_SC, ucp_Thai }, - { 1202, PT_SC, ucp_Tibetan }, - { 1210, PT_SC, ucp_Tifinagh }, - { 1219, PT_SC, ucp_Tirhuta }, - { 1227, PT_SC, ucp_Ugaritic }, - { 1236, PT_SC, ucp_Vai }, - { 1240, PT_SC, ucp_Warang_Citi }, - { 1252, PT_ALNUM, 0 }, - { 1256, PT_PXSPACE, 0 }, - { 1260, PT_SPACE, 0 }, - { 1264, PT_UCNC, 0 }, - { 1268, PT_WORD, 0 }, - { 1272, PT_SC, ucp_Yi }, - { 1275, PT_GC, ucp_Z }, - { 1277, PT_PC, ucp_Zl }, - { 1280, PT_PC, ucp_Zp }, - { 1283, PT_PC, ucp_Zs } + { 0, PT_SC, ucp_Ahom }, + { 5, PT_SC, ucp_Anatolian_Hieroglyphs }, + { 27, PT_ANY, 0 }, + { 31, PT_SC, ucp_Arabic }, + { 38, PT_SC, ucp_Armenian }, + { 47, PT_SC, ucp_Avestan }, + { 55, PT_SC, ucp_Balinese }, + { 64, PT_SC, ucp_Bamum }, + { 70, PT_SC, ucp_Bassa_Vah }, + { 80, PT_SC, ucp_Batak }, + { 86, PT_SC, ucp_Bengali }, + { 94, PT_SC, ucp_Bopomofo }, + { 103, PT_SC, ucp_Brahmi }, + { 110, PT_SC, ucp_Braille }, + { 118, PT_SC, ucp_Buginese }, + { 127, PT_SC, ucp_Buhid }, + { 133, PT_GC, ucp_C }, + { 135, PT_SC, ucp_Canadian_Aboriginal }, + { 155, PT_SC, ucp_Carian }, + { 162, PT_SC, ucp_Caucasian_Albanian }, + { 181, PT_PC, ucp_Cc }, + { 184, PT_PC, ucp_Cf }, + { 187, PT_SC, ucp_Chakma }, + { 194, PT_SC, ucp_Cham }, + { 199, PT_SC, ucp_Cherokee }, + { 208, PT_PC, ucp_Cn }, + { 211, PT_PC, ucp_Co }, + { 214, PT_SC, ucp_Common }, + { 221, PT_SC, ucp_Coptic }, + { 228, PT_PC, ucp_Cs }, + { 231, PT_SC, ucp_Cuneiform }, + { 241, PT_SC, ucp_Cypriot }, + { 249, PT_SC, ucp_Cyrillic }, + { 258, PT_SC, ucp_Deseret }, + { 266, PT_SC, ucp_Devanagari }, + { 277, PT_SC, ucp_Duployan }, + { 286, PT_SC, ucp_Egyptian_Hieroglyphs }, + { 307, PT_SC, ucp_Elbasan }, + { 315, PT_SC, ucp_Ethiopic }, + { 324, PT_SC, ucp_Georgian }, + { 333, PT_SC, ucp_Glagolitic }, + { 344, PT_SC, ucp_Gothic }, + { 351, PT_SC, ucp_Grantha }, + { 359, PT_SC, ucp_Greek }, + { 365, PT_SC, ucp_Gujarati }, + { 374, PT_SC, ucp_Gurmukhi }, + { 383, PT_SC, ucp_Han }, + { 387, PT_SC, ucp_Hangul }, + { 394, PT_SC, ucp_Hanunoo }, + { 402, PT_SC, ucp_Hatran }, + { 409, PT_SC, ucp_Hebrew }, + { 416, PT_SC, ucp_Hiragana }, + { 425, PT_SC, ucp_Imperial_Aramaic }, + { 442, PT_SC, ucp_Inherited }, + { 452, PT_SC, ucp_Inscriptional_Pahlavi }, + { 474, PT_SC, ucp_Inscriptional_Parthian }, + { 497, PT_SC, ucp_Javanese }, + { 506, PT_SC, ucp_Kaithi }, + { 513, PT_SC, ucp_Kannada }, + { 521, PT_SC, ucp_Katakana }, + { 530, PT_SC, ucp_Kayah_Li }, + { 539, PT_SC, ucp_Kharoshthi }, + { 550, PT_SC, ucp_Khmer }, + { 556, PT_SC, ucp_Khojki }, + { 563, PT_SC, ucp_Khudawadi }, + { 573, PT_GC, ucp_L }, + { 575, PT_LAMP, 0 }, + { 578, PT_SC, ucp_Lao }, + { 582, PT_SC, ucp_Latin }, + { 588, PT_SC, ucp_Lepcha }, + { 595, PT_SC, ucp_Limbu }, + { 601, PT_SC, ucp_Linear_A }, + { 610, PT_SC, ucp_Linear_B }, + { 619, PT_SC, ucp_Lisu }, + { 624, PT_PC, ucp_Ll }, + { 627, PT_PC, ucp_Lm }, + { 630, PT_PC, ucp_Lo }, + { 633, PT_PC, ucp_Lt }, + { 636, PT_PC, ucp_Lu }, + { 639, PT_SC, ucp_Lycian }, + { 646, PT_SC, ucp_Lydian }, + { 653, PT_GC, ucp_M }, + { 655, PT_SC, ucp_Mahajani }, + { 664, PT_SC, ucp_Malayalam }, + { 674, PT_SC, ucp_Mandaic }, + { 682, PT_SC, ucp_Manichaean }, + { 693, PT_PC, ucp_Mc }, + { 696, PT_PC, ucp_Me }, + { 699, PT_SC, ucp_Meetei_Mayek }, + { 712, PT_SC, ucp_Mende_Kikakui }, + { 726, PT_SC, ucp_Meroitic_Cursive }, + { 743, PT_SC, ucp_Meroitic_Hieroglyphs }, + { 764, PT_SC, ucp_Miao }, + { 769, PT_PC, ucp_Mn }, + { 772, PT_SC, ucp_Modi }, + { 777, PT_SC, ucp_Mongolian }, + { 787, PT_SC, ucp_Mro }, + { 791, PT_SC, ucp_Multani }, + { 799, PT_SC, ucp_Myanmar }, + { 807, PT_GC, ucp_N }, + { 809, PT_SC, ucp_Nabataean }, + { 819, PT_PC, ucp_Nd }, + { 822, PT_SC, ucp_New_Tai_Lue }, + { 834, PT_SC, ucp_Nko }, + { 838, PT_PC, ucp_Nl }, + { 841, PT_PC, ucp_No }, + { 844, PT_SC, ucp_Ogham }, + { 850, PT_SC, ucp_Ol_Chiki }, + { 859, PT_SC, ucp_Old_Hungarian }, + { 873, PT_SC, ucp_Old_Italic }, + { 884, PT_SC, ucp_Old_North_Arabian }, + { 902, PT_SC, ucp_Old_Permic }, + { 913, PT_SC, ucp_Old_Persian }, + { 925, PT_SC, ucp_Old_South_Arabian }, + { 943, PT_SC, ucp_Old_Turkic }, + { 954, PT_SC, ucp_Oriya }, + { 960, PT_SC, ucp_Osmanya }, + { 968, PT_GC, ucp_P }, + { 970, PT_SC, ucp_Pahawh_Hmong }, + { 983, PT_SC, ucp_Palmyrene }, + { 993, PT_SC, ucp_Pau_Cin_Hau }, + { 1005, PT_PC, ucp_Pc }, + { 1008, PT_PC, ucp_Pd }, + { 1011, PT_PC, ucp_Pe }, + { 1014, PT_PC, ucp_Pf }, + { 1017, PT_SC, ucp_Phags_Pa }, + { 1026, PT_SC, ucp_Phoenician }, + { 1037, PT_PC, ucp_Pi }, + { 1040, PT_PC, ucp_Po }, + { 1043, PT_PC, ucp_Ps }, + { 1046, PT_SC, ucp_Psalter_Pahlavi }, + { 1062, PT_SC, ucp_Rejang }, + { 1069, PT_SC, ucp_Runic }, + { 1075, PT_GC, ucp_S }, + { 1077, PT_SC, ucp_Samaritan }, + { 1087, PT_SC, ucp_Saurashtra }, + { 1098, PT_PC, ucp_Sc }, + { 1101, PT_SC, ucp_Sharada }, + { 1109, PT_SC, ucp_Shavian }, + { 1117, PT_SC, ucp_Siddham }, + { 1125, PT_SC, ucp_SignWriting }, + { 1137, PT_SC, ucp_Sinhala }, + { 1145, PT_PC, ucp_Sk }, + { 1148, PT_PC, ucp_Sm }, + { 1151, PT_PC, ucp_So }, + { 1154, PT_SC, ucp_Sora_Sompeng }, + { 1167, PT_SC, ucp_Sundanese }, + { 1177, PT_SC, ucp_Syloti_Nagri }, + { 1190, PT_SC, ucp_Syriac }, + { 1197, PT_SC, ucp_Tagalog }, + { 1205, PT_SC, ucp_Tagbanwa }, + { 1214, PT_SC, ucp_Tai_Le }, + { 1221, PT_SC, ucp_Tai_Tham }, + { 1230, PT_SC, ucp_Tai_Viet }, + { 1239, PT_SC, ucp_Takri }, + { 1245, PT_SC, ucp_Tamil }, + { 1251, PT_SC, ucp_Telugu }, + { 1258, PT_SC, ucp_Thaana }, + { 1265, PT_SC, ucp_Thai }, + { 1270, PT_SC, ucp_Tibetan }, + { 1278, PT_SC, ucp_Tifinagh }, + { 1287, PT_SC, ucp_Tirhuta }, + { 1295, PT_SC, ucp_Ugaritic }, + { 1304, PT_SC, ucp_Vai }, + { 1308, PT_SC, ucp_Warang_Citi }, + { 1320, PT_ALNUM, 0 }, + { 1324, PT_PXSPACE, 0 }, + { 1328, PT_SPACE, 0 }, + { 1332, PT_UCNC, 0 }, + { 1336, PT_WORD, 0 }, + { 1340, PT_SC, ucp_Yi }, + { 1343, PT_GC, ucp_Z }, + { 1345, PT_PC, ucp_Zl }, + { 1348, PT_PC, ucp_Zp }, + { 1351, PT_PC, ucp_Zs } }; const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); diff --git a/pcre2/src/pcre2_ucd.c b/pcre2/src/pcre2_ucd.c index 7199cbda7..116f537b3 100644 --- a/pcre2/src/pcre2_ucd.c +++ b/pcre2/src/pcre2_ucd.c @@ -20,7 +20,7 @@ needed. */ /* Unicode character database. */ /* This file was autogenerated by the MultiStage2.py script. */ -/* Total size: 72576 bytes, block size: 128. */ +/* Total size: 75072 bytes, block size: 128. */ /* The tables herein are needed only when UCP support is built, and in PCRE2 that happens automatically with UTF support. @@ -39,7 +39,7 @@ const uint16_t PRIV(ucd_stage2)[] = {0}; const uint32_t PRIV(ucd_caseless_sets)[] = {0}; #else -const char *PRIV(unicode_version) = "7.0.0"; +const char *PRIV(unicode_version) = "8.0.0"; /* When recompiling tables with a new Unicode version, please check the types in this structure definition from pcre2_internal.h (the actual @@ -82,7 +82,7 @@ const uint32_t PRIV(ucd_caseless_sets)[] = { #ifndef PCRE2_PCRE2TEST -const ucd_record PRIV(ucd_records)[] = { /* 5760 bytes, record size 8 */ +const ucd_record PRIV(ucd_records)[] = { /* 5952 bytes, record size 8 */ { 9, 0, 2, 0, 0, }, /* 0 */ { 9, 0, 1, 0, 0, }, /* 1 */ { 9, 0, 0, 0, 0, }, /* 2 */ @@ -188,621 +188,645 @@ const ucd_record PRIV(ucd_records)[] = { /* 5760 bytes, record size 8 */ { 33, 5, 12, 0, -217, }, /* 102 */ { 33, 5, 12, 0, -71, }, /* 103 */ { 33, 5, 12, 0, -219, }, /* 104 */ - { 33, 5, 12, 0, 42258, }, /* 105 */ - { 33, 6, 12, 0, 0, }, /* 106 */ - { 9, 6, 12, 0, 0, }, /* 107 */ - { 3, 24, 12, 0, 0, }, /* 108 */ - { 27, 12, 3, 0, 0, }, /* 109 */ - { 27, 12, 3, 21, 116, }, /* 110 */ - { 19, 9, 12, 0, 1, }, /* 111 */ - { 19, 5, 12, 0, -1, }, /* 112 */ - { 19, 24, 12, 0, 0, }, /* 113 */ - { 9, 2, 12, 0, 0, }, /* 114 */ - { 19, 6, 12, 0, 0, }, /* 115 */ - { 19, 5, 12, 0, 130, }, /* 116 */ - { 19, 9, 12, 0, 116, }, /* 117 */ - { 19, 9, 12, 0, 38, }, /* 118 */ - { 19, 9, 12, 0, 37, }, /* 119 */ - { 19, 9, 12, 0, 64, }, /* 120 */ - { 19, 9, 12, 0, 63, }, /* 121 */ - { 19, 5, 12, 0, 0, }, /* 122 */ - { 19, 9, 12, 0, 32, }, /* 123 */ - { 19, 9, 12, 34, 32, }, /* 124 */ - { 19, 9, 12, 59, 32, }, /* 125 */ - { 19, 9, 12, 38, 32, }, /* 126 */ - { 19, 9, 12, 21, 32, }, /* 127 */ - { 19, 9, 12, 51, 32, }, /* 128 */ - { 19, 9, 12, 26, 32, }, /* 129 */ - { 19, 9, 12, 47, 32, }, /* 130 */ - { 19, 9, 12, 55, 32, }, /* 131 */ - { 19, 9, 12, 30, 32, }, /* 132 */ - { 19, 9, 12, 43, 32, }, /* 133 */ - { 19, 9, 12, 67, 32, }, /* 134 */ - { 19, 5, 12, 0, -38, }, /* 135 */ - { 19, 5, 12, 0, -37, }, /* 136 */ - { 19, 5, 12, 0, -32, }, /* 137 */ - { 19, 5, 12, 34, -32, }, /* 138 */ - { 19, 5, 12, 59, -32, }, /* 139 */ - { 19, 5, 12, 38, -32, }, /* 140 */ - { 19, 5, 12, 21, -116, }, /* 141 */ - { 19, 5, 12, 51, -32, }, /* 142 */ - { 19, 5, 12, 26, -775, }, /* 143 */ - { 19, 5, 12, 47, -32, }, /* 144 */ - { 19, 5, 12, 55, -32, }, /* 145 */ - { 19, 5, 12, 30, 1, }, /* 146 */ - { 19, 5, 12, 30, -32, }, /* 147 */ - { 19, 5, 12, 43, -32, }, /* 148 */ - { 19, 5, 12, 67, -32, }, /* 149 */ - { 19, 5, 12, 0, -64, }, /* 150 */ - { 19, 5, 12, 0, -63, }, /* 151 */ - { 19, 9, 12, 0, 8, }, /* 152 */ - { 19, 5, 12, 34, -30, }, /* 153 */ - { 19, 5, 12, 38, -25, }, /* 154 */ - { 19, 9, 12, 0, 0, }, /* 155 */ - { 19, 5, 12, 43, -15, }, /* 156 */ - { 19, 5, 12, 47, -22, }, /* 157 */ - { 19, 5, 12, 0, -8, }, /* 158 */ - { 10, 9, 12, 0, 1, }, /* 159 */ - { 10, 5, 12, 0, -1, }, /* 160 */ - { 19, 5, 12, 51, -54, }, /* 161 */ - { 19, 5, 12, 55, -48, }, /* 162 */ - { 19, 5, 12, 0, 7, }, /* 163 */ - { 19, 5, 12, 0, -116, }, /* 164 */ - { 19, 9, 12, 38, -60, }, /* 165 */ - { 19, 5, 12, 59, -64, }, /* 166 */ - { 19, 25, 12, 0, 0, }, /* 167 */ - { 19, 9, 12, 0, -7, }, /* 168 */ - { 19, 9, 12, 0, -130, }, /* 169 */ - { 12, 9, 12, 0, 80, }, /* 170 */ - { 12, 9, 12, 0, 32, }, /* 171 */ - { 12, 5, 12, 0, -32, }, /* 172 */ - { 12, 5, 12, 0, -80, }, /* 173 */ - { 12, 9, 12, 0, 1, }, /* 174 */ - { 12, 5, 12, 0, -1, }, /* 175 */ - { 12, 26, 12, 0, 0, }, /* 176 */ - { 12, 12, 3, 0, 0, }, /* 177 */ - { 12, 11, 3, 0, 0, }, /* 178 */ - { 12, 9, 12, 0, 15, }, /* 179 */ - { 12, 5, 12, 0, -15, }, /* 180 */ - { 1, 9, 12, 0, 48, }, /* 181 */ - { 1, 6, 12, 0, 0, }, /* 182 */ - { 1, 21, 12, 0, 0, }, /* 183 */ - { 1, 5, 12, 0, -48, }, /* 184 */ - { 1, 5, 12, 0, 0, }, /* 185 */ - { 1, 17, 12, 0, 0, }, /* 186 */ - { 1, 26, 12, 0, 0, }, /* 187 */ - { 1, 23, 12, 0, 0, }, /* 188 */ - { 25, 12, 3, 0, 0, }, /* 189 */ - { 25, 17, 12, 0, 0, }, /* 190 */ - { 25, 21, 12, 0, 0, }, /* 191 */ - { 25, 7, 12, 0, 0, }, /* 192 */ - { 0, 1, 2, 0, 0, }, /* 193 */ - { 0, 25, 12, 0, 0, }, /* 194 */ - { 0, 21, 12, 0, 0, }, /* 195 */ - { 0, 23, 12, 0, 0, }, /* 196 */ - { 0, 26, 12, 0, 0, }, /* 197 */ - { 0, 12, 3, 0, 0, }, /* 198 */ - { 0, 7, 12, 0, 0, }, /* 199 */ - { 0, 6, 12, 0, 0, }, /* 200 */ + { 33, 5, 12, 0, 42261, }, /* 105 */ + { 33, 5, 12, 0, 42258, }, /* 106 */ + { 33, 6, 12, 0, 0, }, /* 107 */ + { 9, 6, 12, 0, 0, }, /* 108 */ + { 3, 24, 12, 0, 0, }, /* 109 */ + { 27, 12, 3, 0, 0, }, /* 110 */ + { 27, 12, 3, 21, 116, }, /* 111 */ + { 19, 9, 12, 0, 1, }, /* 112 */ + { 19, 5, 12, 0, -1, }, /* 113 */ + { 19, 24, 12, 0, 0, }, /* 114 */ + { 9, 2, 12, 0, 0, }, /* 115 */ + { 19, 6, 12, 0, 0, }, /* 116 */ + { 19, 5, 12, 0, 130, }, /* 117 */ + { 19, 9, 12, 0, 116, }, /* 118 */ + { 19, 9, 12, 0, 38, }, /* 119 */ + { 19, 9, 12, 0, 37, }, /* 120 */ + { 19, 9, 12, 0, 64, }, /* 121 */ + { 19, 9, 12, 0, 63, }, /* 122 */ + { 19, 5, 12, 0, 0, }, /* 123 */ + { 19, 9, 12, 0, 32, }, /* 124 */ + { 19, 9, 12, 34, 32, }, /* 125 */ + { 19, 9, 12, 59, 32, }, /* 126 */ + { 19, 9, 12, 38, 32, }, /* 127 */ + { 19, 9, 12, 21, 32, }, /* 128 */ + { 19, 9, 12, 51, 32, }, /* 129 */ + { 19, 9, 12, 26, 32, }, /* 130 */ + { 19, 9, 12, 47, 32, }, /* 131 */ + { 19, 9, 12, 55, 32, }, /* 132 */ + { 19, 9, 12, 30, 32, }, /* 133 */ + { 19, 9, 12, 43, 32, }, /* 134 */ + { 19, 9, 12, 67, 32, }, /* 135 */ + { 19, 5, 12, 0, -38, }, /* 136 */ + { 19, 5, 12, 0, -37, }, /* 137 */ + { 19, 5, 12, 0, -32, }, /* 138 */ + { 19, 5, 12, 34, -32, }, /* 139 */ + { 19, 5, 12, 59, -32, }, /* 140 */ + { 19, 5, 12, 38, -32, }, /* 141 */ + { 19, 5, 12, 21, -116, }, /* 142 */ + { 19, 5, 12, 51, -32, }, /* 143 */ + { 19, 5, 12, 26, -775, }, /* 144 */ + { 19, 5, 12, 47, -32, }, /* 145 */ + { 19, 5, 12, 55, -32, }, /* 146 */ + { 19, 5, 12, 30, 1, }, /* 147 */ + { 19, 5, 12, 30, -32, }, /* 148 */ + { 19, 5, 12, 43, -32, }, /* 149 */ + { 19, 5, 12, 67, -32, }, /* 150 */ + { 19, 5, 12, 0, -64, }, /* 151 */ + { 19, 5, 12, 0, -63, }, /* 152 */ + { 19, 9, 12, 0, 8, }, /* 153 */ + { 19, 5, 12, 34, -30, }, /* 154 */ + { 19, 5, 12, 38, -25, }, /* 155 */ + { 19, 9, 12, 0, 0, }, /* 156 */ + { 19, 5, 12, 43, -15, }, /* 157 */ + { 19, 5, 12, 47, -22, }, /* 158 */ + { 19, 5, 12, 0, -8, }, /* 159 */ + { 10, 9, 12, 0, 1, }, /* 160 */ + { 10, 5, 12, 0, -1, }, /* 161 */ + { 19, 5, 12, 51, -54, }, /* 162 */ + { 19, 5, 12, 55, -48, }, /* 163 */ + { 19, 5, 12, 0, 7, }, /* 164 */ + { 19, 5, 12, 0, -116, }, /* 165 */ + { 19, 9, 12, 38, -60, }, /* 166 */ + { 19, 5, 12, 59, -64, }, /* 167 */ + { 19, 25, 12, 0, 0, }, /* 168 */ + { 19, 9, 12, 0, -7, }, /* 169 */ + { 19, 9, 12, 0, -130, }, /* 170 */ + { 12, 9, 12, 0, 80, }, /* 171 */ + { 12, 9, 12, 0, 32, }, /* 172 */ + { 12, 5, 12, 0, -32, }, /* 173 */ + { 12, 5, 12, 0, -80, }, /* 174 */ + { 12, 9, 12, 0, 1, }, /* 175 */ + { 12, 5, 12, 0, -1, }, /* 176 */ + { 12, 26, 12, 0, 0, }, /* 177 */ + { 12, 12, 3, 0, 0, }, /* 178 */ + { 12, 11, 3, 0, 0, }, /* 179 */ + { 12, 9, 12, 0, 15, }, /* 180 */ + { 12, 5, 12, 0, -15, }, /* 181 */ + { 1, 9, 12, 0, 48, }, /* 182 */ + { 1, 6, 12, 0, 0, }, /* 183 */ + { 1, 21, 12, 0, 0, }, /* 184 */ + { 1, 5, 12, 0, -48, }, /* 185 */ + { 1, 5, 12, 0, 0, }, /* 186 */ + { 1, 17, 12, 0, 0, }, /* 187 */ + { 1, 26, 12, 0, 0, }, /* 188 */ + { 1, 23, 12, 0, 0, }, /* 189 */ + { 25, 12, 3, 0, 0, }, /* 190 */ + { 25, 17, 12, 0, 0, }, /* 191 */ + { 25, 21, 12, 0, 0, }, /* 192 */ + { 25, 7, 12, 0, 0, }, /* 193 */ + { 0, 1, 2, 0, 0, }, /* 194 */ + { 0, 25, 12, 0, 0, }, /* 195 */ + { 0, 21, 12, 0, 0, }, /* 196 */ + { 0, 23, 12, 0, 0, }, /* 197 */ + { 0, 26, 12, 0, 0, }, /* 198 */ + { 0, 12, 3, 0, 0, }, /* 199 */ + { 0, 7, 12, 0, 0, }, /* 200 */ { 0, 13, 12, 0, 0, }, /* 201 */ - { 49, 21, 12, 0, 0, }, /* 202 */ - { 49, 1, 2, 0, 0, }, /* 203 */ - { 49, 7, 12, 0, 0, }, /* 204 */ - { 49, 12, 3, 0, 0, }, /* 205 */ - { 55, 7, 12, 0, 0, }, /* 206 */ - { 55, 12, 3, 0, 0, }, /* 207 */ - { 63, 13, 12, 0, 0, }, /* 208 */ - { 63, 7, 12, 0, 0, }, /* 209 */ - { 63, 12, 3, 0, 0, }, /* 210 */ - { 63, 6, 12, 0, 0, }, /* 211 */ - { 63, 26, 12, 0, 0, }, /* 212 */ - { 63, 21, 12, 0, 0, }, /* 213 */ - { 89, 7, 12, 0, 0, }, /* 214 */ - { 89, 12, 3, 0, 0, }, /* 215 */ - { 89, 6, 12, 0, 0, }, /* 216 */ - { 89, 21, 12, 0, 0, }, /* 217 */ - { 94, 7, 12, 0, 0, }, /* 218 */ - { 94, 12, 3, 0, 0, }, /* 219 */ - { 94, 21, 12, 0, 0, }, /* 220 */ - { 14, 12, 3, 0, 0, }, /* 221 */ - { 14, 10, 5, 0, 0, }, /* 222 */ - { 14, 7, 12, 0, 0, }, /* 223 */ - { 14, 13, 12, 0, 0, }, /* 224 */ - { 14, 21, 12, 0, 0, }, /* 225 */ - { 14, 6, 12, 0, 0, }, /* 226 */ - { 2, 7, 12, 0, 0, }, /* 227 */ - { 2, 12, 3, 0, 0, }, /* 228 */ - { 2, 10, 5, 0, 0, }, /* 229 */ - { 2, 10, 3, 0, 0, }, /* 230 */ - { 2, 13, 12, 0, 0, }, /* 231 */ - { 2, 23, 12, 0, 0, }, /* 232 */ - { 2, 15, 12, 0, 0, }, /* 233 */ - { 2, 26, 12, 0, 0, }, /* 234 */ - { 21, 12, 3, 0, 0, }, /* 235 */ - { 21, 10, 5, 0, 0, }, /* 236 */ - { 21, 7, 12, 0, 0, }, /* 237 */ - { 21, 13, 12, 0, 0, }, /* 238 */ - { 20, 12, 3, 0, 0, }, /* 239 */ - { 20, 10, 5, 0, 0, }, /* 240 */ - { 20, 7, 12, 0, 0, }, /* 241 */ - { 20, 13, 12, 0, 0, }, /* 242 */ - { 20, 21, 12, 0, 0, }, /* 243 */ - { 20, 23, 12, 0, 0, }, /* 244 */ - { 43, 12, 3, 0, 0, }, /* 245 */ - { 43, 10, 5, 0, 0, }, /* 246 */ - { 43, 7, 12, 0, 0, }, /* 247 */ - { 43, 10, 3, 0, 0, }, /* 248 */ - { 43, 13, 12, 0, 0, }, /* 249 */ - { 43, 26, 12, 0, 0, }, /* 250 */ - { 43, 15, 12, 0, 0, }, /* 251 */ - { 53, 12, 3, 0, 0, }, /* 252 */ - { 53, 7, 12, 0, 0, }, /* 253 */ - { 53, 10, 3, 0, 0, }, /* 254 */ - { 53, 10, 5, 0, 0, }, /* 255 */ - { 53, 13, 12, 0, 0, }, /* 256 */ - { 53, 15, 12, 0, 0, }, /* 257 */ - { 53, 26, 12, 0, 0, }, /* 258 */ - { 53, 23, 12, 0, 0, }, /* 259 */ - { 54, 12, 3, 0, 0, }, /* 260 */ - { 54, 10, 5, 0, 0, }, /* 261 */ - { 54, 7, 12, 0, 0, }, /* 262 */ - { 54, 13, 12, 0, 0, }, /* 263 */ - { 54, 15, 12, 0, 0, }, /* 264 */ - { 54, 26, 12, 0, 0, }, /* 265 */ - { 28, 12, 3, 0, 0, }, /* 266 */ - { 28, 10, 5, 0, 0, }, /* 267 */ - { 28, 7, 12, 0, 0, }, /* 268 */ - { 28, 10, 3, 0, 0, }, /* 269 */ - { 28, 13, 12, 0, 0, }, /* 270 */ - { 36, 12, 3, 0, 0, }, /* 271 */ - { 36, 10, 5, 0, 0, }, /* 272 */ - { 36, 7, 12, 0, 0, }, /* 273 */ - { 36, 10, 3, 0, 0, }, /* 274 */ - { 36, 13, 12, 0, 0, }, /* 275 */ - { 36, 15, 12, 0, 0, }, /* 276 */ - { 36, 26, 12, 0, 0, }, /* 277 */ - { 47, 10, 5, 0, 0, }, /* 278 */ - { 47, 7, 12, 0, 0, }, /* 279 */ - { 47, 12, 3, 0, 0, }, /* 280 */ - { 47, 10, 3, 0, 0, }, /* 281 */ - { 47, 13, 12, 0, 0, }, /* 282 */ - { 47, 21, 12, 0, 0, }, /* 283 */ - { 56, 7, 12, 0, 0, }, /* 284 */ - { 56, 12, 3, 0, 0, }, /* 285 */ - { 56, 7, 5, 0, 0, }, /* 286 */ - { 56, 6, 12, 0, 0, }, /* 287 */ - { 56, 21, 12, 0, 0, }, /* 288 */ - { 56, 13, 12, 0, 0, }, /* 289 */ - { 32, 7, 12, 0, 0, }, /* 290 */ - { 32, 12, 3, 0, 0, }, /* 291 */ - { 32, 7, 5, 0, 0, }, /* 292 */ - { 32, 6, 12, 0, 0, }, /* 293 */ - { 32, 13, 12, 0, 0, }, /* 294 */ - { 57, 7, 12, 0, 0, }, /* 295 */ - { 57, 26, 12, 0, 0, }, /* 296 */ - { 57, 21, 12, 0, 0, }, /* 297 */ - { 57, 12, 3, 0, 0, }, /* 298 */ - { 57, 13, 12, 0, 0, }, /* 299 */ - { 57, 15, 12, 0, 0, }, /* 300 */ - { 57, 22, 12, 0, 0, }, /* 301 */ - { 57, 18, 12, 0, 0, }, /* 302 */ - { 57, 10, 5, 0, 0, }, /* 303 */ - { 38, 7, 12, 0, 0, }, /* 304 */ - { 38, 10, 12, 0, 0, }, /* 305 */ - { 38, 12, 3, 0, 0, }, /* 306 */ - { 38, 10, 5, 0, 0, }, /* 307 */ - { 38, 13, 12, 0, 0, }, /* 308 */ - { 38, 21, 12, 0, 0, }, /* 309 */ - { 38, 26, 12, 0, 0, }, /* 310 */ - { 16, 9, 12, 0, 7264, }, /* 311 */ - { 16, 7, 12, 0, 0, }, /* 312 */ - { 16, 6, 12, 0, 0, }, /* 313 */ - { 23, 7, 6, 0, 0, }, /* 314 */ - { 23, 7, 7, 0, 0, }, /* 315 */ - { 23, 7, 8, 0, 0, }, /* 316 */ - { 15, 7, 12, 0, 0, }, /* 317 */ - { 15, 12, 3, 0, 0, }, /* 318 */ - { 15, 21, 12, 0, 0, }, /* 319 */ - { 15, 15, 12, 0, 0, }, /* 320 */ - { 15, 26, 12, 0, 0, }, /* 321 */ - { 8, 7, 12, 0, 0, }, /* 322 */ - { 7, 17, 12, 0, 0, }, /* 323 */ - { 7, 7, 12, 0, 0, }, /* 324 */ - { 7, 21, 12, 0, 0, }, /* 325 */ - { 40, 29, 12, 0, 0, }, /* 326 */ - { 40, 7, 12, 0, 0, }, /* 327 */ - { 40, 22, 12, 0, 0, }, /* 328 */ - { 40, 18, 12, 0, 0, }, /* 329 */ - { 45, 7, 12, 0, 0, }, /* 330 */ - { 45, 14, 12, 0, 0, }, /* 331 */ - { 50, 7, 12, 0, 0, }, /* 332 */ - { 50, 12, 3, 0, 0, }, /* 333 */ - { 24, 7, 12, 0, 0, }, /* 334 */ - { 24, 12, 3, 0, 0, }, /* 335 */ - { 6, 7, 12, 0, 0, }, /* 336 */ - { 6, 12, 3, 0, 0, }, /* 337 */ - { 51, 7, 12, 0, 0, }, /* 338 */ - { 51, 12, 3, 0, 0, }, /* 339 */ - { 31, 7, 12, 0, 0, }, /* 340 */ - { 31, 12, 3, 0, 0, }, /* 341 */ - { 31, 10, 5, 0, 0, }, /* 342 */ - { 31, 21, 12, 0, 0, }, /* 343 */ - { 31, 6, 12, 0, 0, }, /* 344 */ - { 31, 23, 12, 0, 0, }, /* 345 */ - { 31, 13, 12, 0, 0, }, /* 346 */ - { 31, 15, 12, 0, 0, }, /* 347 */ - { 37, 21, 12, 0, 0, }, /* 348 */ - { 37, 17, 12, 0, 0, }, /* 349 */ - { 37, 12, 3, 0, 0, }, /* 350 */ - { 37, 1, 2, 0, 0, }, /* 351 */ - { 37, 13, 12, 0, 0, }, /* 352 */ - { 37, 7, 12, 0, 0, }, /* 353 */ - { 37, 6, 12, 0, 0, }, /* 354 */ - { 34, 7, 12, 0, 0, }, /* 355 */ - { 34, 12, 3, 0, 0, }, /* 356 */ - { 34, 10, 5, 0, 0, }, /* 357 */ - { 34, 26, 12, 0, 0, }, /* 358 */ - { 34, 21, 12, 0, 0, }, /* 359 */ - { 34, 13, 12, 0, 0, }, /* 360 */ - { 52, 7, 12, 0, 0, }, /* 361 */ - { 39, 7, 12, 0, 0, }, /* 362 */ - { 39, 10, 12, 0, 0, }, /* 363 */ - { 39, 10, 5, 0, 0, }, /* 364 */ - { 39, 13, 12, 0, 0, }, /* 365 */ - { 39, 15, 12, 0, 0, }, /* 366 */ - { 39, 26, 12, 0, 0, }, /* 367 */ - { 31, 26, 12, 0, 0, }, /* 368 */ - { 5, 7, 12, 0, 0, }, /* 369 */ - { 5, 12, 3, 0, 0, }, /* 370 */ - { 5, 10, 5, 0, 0, }, /* 371 */ - { 5, 21, 12, 0, 0, }, /* 372 */ - { 90, 7, 12, 0, 0, }, /* 373 */ - { 90, 10, 5, 0, 0, }, /* 374 */ - { 90, 12, 3, 0, 0, }, /* 375 */ - { 90, 10, 12, 0, 0, }, /* 376 */ - { 90, 13, 12, 0, 0, }, /* 377 */ - { 90, 21, 12, 0, 0, }, /* 378 */ - { 90, 6, 12, 0, 0, }, /* 379 */ - { 27, 11, 3, 0, 0, }, /* 380 */ - { 61, 12, 3, 0, 0, }, /* 381 */ - { 61, 10, 5, 0, 0, }, /* 382 */ - { 61, 7, 12, 0, 0, }, /* 383 */ - { 61, 13, 12, 0, 0, }, /* 384 */ - { 61, 21, 12, 0, 0, }, /* 385 */ - { 61, 26, 12, 0, 0, }, /* 386 */ - { 75, 12, 3, 0, 0, }, /* 387 */ - { 75, 10, 5, 0, 0, }, /* 388 */ - { 75, 7, 12, 0, 0, }, /* 389 */ - { 75, 13, 12, 0, 0, }, /* 390 */ - { 92, 7, 12, 0, 0, }, /* 391 */ - { 92, 12, 3, 0, 0, }, /* 392 */ - { 92, 10, 5, 0, 0, }, /* 393 */ - { 92, 21, 12, 0, 0, }, /* 394 */ - { 69, 7, 12, 0, 0, }, /* 395 */ - { 69, 10, 5, 0, 0, }, /* 396 */ - { 69, 12, 3, 0, 0, }, /* 397 */ - { 69, 21, 12, 0, 0, }, /* 398 */ - { 69, 13, 12, 0, 0, }, /* 399 */ - { 72, 13, 12, 0, 0, }, /* 400 */ - { 72, 7, 12, 0, 0, }, /* 401 */ - { 72, 6, 12, 0, 0, }, /* 402 */ - { 72, 21, 12, 0, 0, }, /* 403 */ - { 75, 21, 12, 0, 0, }, /* 404 */ - { 9, 10, 5, 0, 0, }, /* 405 */ - { 9, 7, 12, 0, 0, }, /* 406 */ - { 12, 5, 12, 0, 0, }, /* 407 */ - { 12, 6, 12, 0, 0, }, /* 408 */ - { 33, 5, 12, 0, 35332, }, /* 409 */ - { 33, 5, 12, 0, 3814, }, /* 410 */ - { 33, 9, 12, 63, 1, }, /* 411 */ - { 33, 5, 12, 63, -1, }, /* 412 */ - { 33, 5, 12, 63, -58, }, /* 413 */ - { 33, 9, 12, 0, -7615, }, /* 414 */ - { 19, 5, 12, 0, 8, }, /* 415 */ - { 19, 9, 12, 0, -8, }, /* 416 */ - { 19, 5, 12, 0, 74, }, /* 417 */ - { 19, 5, 12, 0, 86, }, /* 418 */ - { 19, 5, 12, 0, 100, }, /* 419 */ - { 19, 5, 12, 0, 128, }, /* 420 */ - { 19, 5, 12, 0, 112, }, /* 421 */ - { 19, 5, 12, 0, 126, }, /* 422 */ - { 19, 8, 12, 0, -8, }, /* 423 */ - { 19, 5, 12, 0, 9, }, /* 424 */ - { 19, 9, 12, 0, -74, }, /* 425 */ - { 19, 8, 12, 0, -9, }, /* 426 */ - { 19, 5, 12, 21, -7173, }, /* 427 */ - { 19, 9, 12, 0, -86, }, /* 428 */ - { 19, 9, 12, 0, -100, }, /* 429 */ - { 19, 9, 12, 0, -112, }, /* 430 */ - { 19, 9, 12, 0, -128, }, /* 431 */ - { 19, 9, 12, 0, -126, }, /* 432 */ - { 27, 1, 3, 0, 0, }, /* 433 */ - { 9, 27, 2, 0, 0, }, /* 434 */ - { 9, 28, 2, 0, 0, }, /* 435 */ - { 9, 2, 2, 0, 0, }, /* 436 */ - { 9, 9, 12, 0, 0, }, /* 437 */ - { 9, 5, 12, 0, 0, }, /* 438 */ - { 19, 9, 12, 67, -7517, }, /* 439 */ - { 33, 9, 12, 71, -8383, }, /* 440 */ - { 33, 9, 12, 75, -8262, }, /* 441 */ - { 33, 9, 12, 0, 28, }, /* 442 */ - { 33, 5, 12, 0, -28, }, /* 443 */ - { 33, 14, 12, 0, 16, }, /* 444 */ - { 33, 14, 12, 0, -16, }, /* 445 */ - { 33, 14, 12, 0, 0, }, /* 446 */ - { 9, 26, 12, 0, 26, }, /* 447 */ - { 9, 26, 12, 0, -26, }, /* 448 */ - { 4, 26, 12, 0, 0, }, /* 449 */ - { 17, 9, 12, 0, 48, }, /* 450 */ - { 17, 5, 12, 0, -48, }, /* 451 */ - { 33, 9, 12, 0, -10743, }, /* 452 */ - { 33, 9, 12, 0, -3814, }, /* 453 */ - { 33, 9, 12, 0, -10727, }, /* 454 */ - { 33, 5, 12, 0, -10795, }, /* 455 */ - { 33, 5, 12, 0, -10792, }, /* 456 */ - { 33, 9, 12, 0, -10780, }, /* 457 */ - { 33, 9, 12, 0, -10749, }, /* 458 */ - { 33, 9, 12, 0, -10783, }, /* 459 */ - { 33, 9, 12, 0, -10782, }, /* 460 */ - { 33, 9, 12, 0, -10815, }, /* 461 */ - { 10, 5, 12, 0, 0, }, /* 462 */ - { 10, 26, 12, 0, 0, }, /* 463 */ - { 10, 12, 3, 0, 0, }, /* 464 */ - { 10, 21, 12, 0, 0, }, /* 465 */ - { 10, 15, 12, 0, 0, }, /* 466 */ - { 16, 5, 12, 0, -7264, }, /* 467 */ - { 58, 7, 12, 0, 0, }, /* 468 */ - { 58, 6, 12, 0, 0, }, /* 469 */ - { 58, 21, 12, 0, 0, }, /* 470 */ - { 58, 12, 3, 0, 0, }, /* 471 */ - { 22, 26, 12, 0, 0, }, /* 472 */ - { 22, 6, 12, 0, 0, }, /* 473 */ - { 22, 14, 12, 0, 0, }, /* 474 */ - { 23, 10, 3, 0, 0, }, /* 475 */ - { 26, 7, 12, 0, 0, }, /* 476 */ - { 26, 6, 12, 0, 0, }, /* 477 */ - { 29, 7, 12, 0, 0, }, /* 478 */ - { 29, 6, 12, 0, 0, }, /* 479 */ - { 3, 7, 12, 0, 0, }, /* 480 */ - { 23, 7, 12, 0, 0, }, /* 481 */ - { 23, 26, 12, 0, 0, }, /* 482 */ - { 29, 26, 12, 0, 0, }, /* 483 */ - { 22, 7, 12, 0, 0, }, /* 484 */ - { 60, 7, 12, 0, 0, }, /* 485 */ - { 60, 6, 12, 0, 0, }, /* 486 */ - { 60, 26, 12, 0, 0, }, /* 487 */ - { 85, 7, 12, 0, 0, }, /* 488 */ - { 85, 6, 12, 0, 0, }, /* 489 */ - { 85, 21, 12, 0, 0, }, /* 490 */ - { 76, 7, 12, 0, 0, }, /* 491 */ - { 76, 6, 12, 0, 0, }, /* 492 */ - { 76, 21, 12, 0, 0, }, /* 493 */ - { 76, 13, 12, 0, 0, }, /* 494 */ - { 12, 7, 12, 0, 0, }, /* 495 */ - { 12, 21, 12, 0, 0, }, /* 496 */ - { 78, 7, 12, 0, 0, }, /* 497 */ - { 78, 14, 12, 0, 0, }, /* 498 */ - { 78, 12, 3, 0, 0, }, /* 499 */ - { 78, 21, 12, 0, 0, }, /* 500 */ - { 33, 9, 12, 0, -35332, }, /* 501 */ - { 33, 9, 12, 0, -42280, }, /* 502 */ - { 33, 9, 12, 0, -42308, }, /* 503 */ - { 33, 9, 12, 0, -42319, }, /* 504 */ - { 33, 9, 12, 0, -42315, }, /* 505 */ - { 33, 9, 12, 0, -42305, }, /* 506 */ - { 33, 9, 12, 0, -42258, }, /* 507 */ - { 33, 9, 12, 0, -42282, }, /* 508 */ - { 48, 7, 12, 0, 0, }, /* 509 */ - { 48, 12, 3, 0, 0, }, /* 510 */ - { 48, 10, 5, 0, 0, }, /* 511 */ - { 48, 26, 12, 0, 0, }, /* 512 */ - { 64, 7, 12, 0, 0, }, /* 513 */ - { 64, 21, 12, 0, 0, }, /* 514 */ - { 74, 10, 5, 0, 0, }, /* 515 */ - { 74, 7, 12, 0, 0, }, /* 516 */ - { 74, 12, 3, 0, 0, }, /* 517 */ - { 74, 21, 12, 0, 0, }, /* 518 */ - { 74, 13, 12, 0, 0, }, /* 519 */ - { 68, 13, 12, 0, 0, }, /* 520 */ - { 68, 7, 12, 0, 0, }, /* 521 */ - { 68, 12, 3, 0, 0, }, /* 522 */ - { 68, 21, 12, 0, 0, }, /* 523 */ - { 73, 7, 12, 0, 0, }, /* 524 */ - { 73, 12, 3, 0, 0, }, /* 525 */ - { 73, 10, 5, 0, 0, }, /* 526 */ - { 73, 21, 12, 0, 0, }, /* 527 */ - { 83, 12, 3, 0, 0, }, /* 528 */ - { 83, 10, 5, 0, 0, }, /* 529 */ - { 83, 7, 12, 0, 0, }, /* 530 */ - { 83, 21, 12, 0, 0, }, /* 531 */ - { 83, 13, 12, 0, 0, }, /* 532 */ - { 38, 6, 12, 0, 0, }, /* 533 */ - { 67, 7, 12, 0, 0, }, /* 534 */ - { 67, 12, 3, 0, 0, }, /* 535 */ - { 67, 10, 5, 0, 0, }, /* 536 */ - { 67, 13, 12, 0, 0, }, /* 537 */ - { 67, 21, 12, 0, 0, }, /* 538 */ - { 91, 7, 12, 0, 0, }, /* 539 */ - { 91, 12, 3, 0, 0, }, /* 540 */ - { 91, 6, 12, 0, 0, }, /* 541 */ - { 91, 21, 12, 0, 0, }, /* 542 */ - { 86, 7, 12, 0, 0, }, /* 543 */ - { 86, 10, 5, 0, 0, }, /* 544 */ - { 86, 12, 3, 0, 0, }, /* 545 */ - { 86, 21, 12, 0, 0, }, /* 546 */ - { 86, 6, 12, 0, 0, }, /* 547 */ - { 86, 13, 12, 0, 0, }, /* 548 */ - { 23, 7, 9, 0, 0, }, /* 549 */ - { 23, 7, 10, 0, 0, }, /* 550 */ - { 9, 4, 2, 0, 0, }, /* 551 */ - { 9, 3, 12, 0, 0, }, /* 552 */ - { 25, 25, 12, 0, 0, }, /* 553 */ - { 0, 24, 12, 0, 0, }, /* 554 */ - { 9, 6, 3, 0, 0, }, /* 555 */ - { 35, 7, 12, 0, 0, }, /* 556 */ - { 19, 14, 12, 0, 0, }, /* 557 */ - { 19, 15, 12, 0, 0, }, /* 558 */ - { 19, 26, 12, 0, 0, }, /* 559 */ - { 70, 7, 12, 0, 0, }, /* 560 */ - { 66, 7, 12, 0, 0, }, /* 561 */ - { 41, 7, 12, 0, 0, }, /* 562 */ - { 41, 15, 12, 0, 0, }, /* 563 */ - { 18, 7, 12, 0, 0, }, /* 564 */ - { 18, 14, 12, 0, 0, }, /* 565 */ - { 117, 7, 12, 0, 0, }, /* 566 */ - { 117, 12, 3, 0, 0, }, /* 567 */ - { 59, 7, 12, 0, 0, }, /* 568 */ - { 59, 21, 12, 0, 0, }, /* 569 */ - { 42, 7, 12, 0, 0, }, /* 570 */ - { 42, 21, 12, 0, 0, }, /* 571 */ - { 42, 14, 12, 0, 0, }, /* 572 */ - { 13, 9, 12, 0, 40, }, /* 573 */ - { 13, 5, 12, 0, -40, }, /* 574 */ - { 46, 7, 12, 0, 0, }, /* 575 */ - { 44, 7, 12, 0, 0, }, /* 576 */ - { 44, 13, 12, 0, 0, }, /* 577 */ - { 105, 7, 12, 0, 0, }, /* 578 */ - { 103, 7, 12, 0, 0, }, /* 579 */ - { 103, 21, 12, 0, 0, }, /* 580 */ - { 109, 7, 12, 0, 0, }, /* 581 */ - { 11, 7, 12, 0, 0, }, /* 582 */ - { 80, 7, 12, 0, 0, }, /* 583 */ - { 80, 21, 12, 0, 0, }, /* 584 */ - { 80, 15, 12, 0, 0, }, /* 585 */ - { 119, 7, 12, 0, 0, }, /* 586 */ - { 119, 26, 12, 0, 0, }, /* 587 */ - { 119, 15, 12, 0, 0, }, /* 588 */ - { 115, 7, 12, 0, 0, }, /* 589 */ - { 115, 15, 12, 0, 0, }, /* 590 */ - { 65, 7, 12, 0, 0, }, /* 591 */ - { 65, 15, 12, 0, 0, }, /* 592 */ - { 65, 21, 12, 0, 0, }, /* 593 */ - { 71, 7, 12, 0, 0, }, /* 594 */ - { 71, 21, 12, 0, 0, }, /* 595 */ - { 97, 7, 12, 0, 0, }, /* 596 */ - { 96, 7, 12, 0, 0, }, /* 597 */ - { 30, 7, 12, 0, 0, }, /* 598 */ - { 30, 12, 3, 0, 0, }, /* 599 */ - { 30, 15, 12, 0, 0, }, /* 600 */ - { 30, 21, 12, 0, 0, }, /* 601 */ - { 87, 7, 12, 0, 0, }, /* 602 */ - { 87, 15, 12, 0, 0, }, /* 603 */ - { 87, 21, 12, 0, 0, }, /* 604 */ - { 116, 7, 12, 0, 0, }, /* 605 */ - { 116, 15, 12, 0, 0, }, /* 606 */ - { 111, 7, 12, 0, 0, }, /* 607 */ - { 111, 26, 12, 0, 0, }, /* 608 */ - { 111, 12, 3, 0, 0, }, /* 609 */ - { 111, 15, 12, 0, 0, }, /* 610 */ - { 111, 21, 12, 0, 0, }, /* 611 */ - { 77, 7, 12, 0, 0, }, /* 612 */ - { 77, 21, 12, 0, 0, }, /* 613 */ - { 82, 7, 12, 0, 0, }, /* 614 */ - { 82, 15, 12, 0, 0, }, /* 615 */ - { 81, 7, 12, 0, 0, }, /* 616 */ - { 81, 15, 12, 0, 0, }, /* 617 */ - { 120, 7, 12, 0, 0, }, /* 618 */ - { 120, 21, 12, 0, 0, }, /* 619 */ - { 120, 15, 12, 0, 0, }, /* 620 */ - { 88, 7, 12, 0, 0, }, /* 621 */ - { 0, 15, 12, 0, 0, }, /* 622 */ - { 93, 10, 5, 0, 0, }, /* 623 */ - { 93, 12, 3, 0, 0, }, /* 624 */ - { 93, 7, 12, 0, 0, }, /* 625 */ - { 93, 21, 12, 0, 0, }, /* 626 */ - { 93, 15, 12, 0, 0, }, /* 627 */ - { 93, 13, 12, 0, 0, }, /* 628 */ - { 84, 12, 3, 0, 0, }, /* 629 */ - { 84, 10, 5, 0, 0, }, /* 630 */ - { 84, 7, 12, 0, 0, }, /* 631 */ - { 84, 21, 12, 0, 0, }, /* 632 */ - { 84, 1, 2, 0, 0, }, /* 633 */ - { 100, 7, 12, 0, 0, }, /* 634 */ - { 100, 13, 12, 0, 0, }, /* 635 */ - { 95, 12, 3, 0, 0, }, /* 636 */ - { 95, 7, 12, 0, 0, }, /* 637 */ - { 95, 10, 5, 0, 0, }, /* 638 */ - { 95, 13, 12, 0, 0, }, /* 639 */ - { 95, 21, 12, 0, 0, }, /* 640 */ - { 110, 7, 12, 0, 0, }, /* 641 */ - { 110, 12, 3, 0, 0, }, /* 642 */ - { 110, 21, 12, 0, 0, }, /* 643 */ - { 99, 12, 3, 0, 0, }, /* 644 */ - { 99, 10, 5, 0, 0, }, /* 645 */ - { 99, 7, 12, 0, 0, }, /* 646 */ - { 99, 21, 12, 0, 0, }, /* 647 */ - { 99, 13, 12, 0, 0, }, /* 648 */ - { 47, 15, 12, 0, 0, }, /* 649 */ - { 107, 7, 12, 0, 0, }, /* 650 */ - { 107, 10, 5, 0, 0, }, /* 651 */ - { 107, 12, 3, 0, 0, }, /* 652 */ - { 107, 21, 12, 0, 0, }, /* 653 */ - { 108, 7, 12, 0, 0, }, /* 654 */ - { 108, 12, 3, 0, 0, }, /* 655 */ - { 108, 10, 5, 0, 0, }, /* 656 */ - { 108, 13, 12, 0, 0, }, /* 657 */ - { 106, 12, 3, 0, 0, }, /* 658 */ - { 106, 10, 5, 0, 0, }, /* 659 */ - { 106, 7, 12, 0, 0, }, /* 660 */ - { 106, 10, 3, 0, 0, }, /* 661 */ - { 123, 7, 12, 0, 0, }, /* 662 */ - { 123, 10, 3, 0, 0, }, /* 663 */ - { 123, 10, 5, 0, 0, }, /* 664 */ - { 123, 12, 3, 0, 0, }, /* 665 */ - { 123, 21, 12, 0, 0, }, /* 666 */ - { 123, 13, 12, 0, 0, }, /* 667 */ - { 122, 7, 12, 0, 0, }, /* 668 */ - { 122, 10, 3, 0, 0, }, /* 669 */ - { 122, 10, 5, 0, 0, }, /* 670 */ - { 122, 12, 3, 0, 0, }, /* 671 */ - { 122, 21, 12, 0, 0, }, /* 672 */ - { 113, 7, 12, 0, 0, }, /* 673 */ - { 113, 10, 5, 0, 0, }, /* 674 */ - { 113, 12, 3, 0, 0, }, /* 675 */ - { 113, 21, 12, 0, 0, }, /* 676 */ - { 113, 13, 12, 0, 0, }, /* 677 */ - { 101, 7, 12, 0, 0, }, /* 678 */ - { 101, 12, 3, 0, 0, }, /* 679 */ - { 101, 10, 5, 0, 0, }, /* 680 */ - { 101, 13, 12, 0, 0, }, /* 681 */ - { 124, 9, 12, 0, 32, }, /* 682 */ - { 124, 5, 12, 0, -32, }, /* 683 */ - { 124, 13, 12, 0, 0, }, /* 684 */ - { 124, 15, 12, 0, 0, }, /* 685 */ - { 124, 7, 12, 0, 0, }, /* 686 */ - { 121, 7, 12, 0, 0, }, /* 687 */ - { 62, 7, 12, 0, 0, }, /* 688 */ - { 62, 14, 12, 0, 0, }, /* 689 */ - { 62, 21, 12, 0, 0, }, /* 690 */ - { 79, 7, 12, 0, 0, }, /* 691 */ - { 114, 7, 12, 0, 0, }, /* 692 */ - { 114, 13, 12, 0, 0, }, /* 693 */ - { 114, 21, 12, 0, 0, }, /* 694 */ - { 102, 7, 12, 0, 0, }, /* 695 */ - { 102, 12, 3, 0, 0, }, /* 696 */ - { 102, 21, 12, 0, 0, }, /* 697 */ - { 118, 7, 12, 0, 0, }, /* 698 */ - { 118, 12, 3, 0, 0, }, /* 699 */ - { 118, 21, 12, 0, 0, }, /* 700 */ - { 118, 26, 12, 0, 0, }, /* 701 */ - { 118, 6, 12, 0, 0, }, /* 702 */ - { 118, 13, 12, 0, 0, }, /* 703 */ - { 118, 15, 12, 0, 0, }, /* 704 */ - { 98, 7, 12, 0, 0, }, /* 705 */ - { 98, 10, 5, 0, 0, }, /* 706 */ - { 98, 12, 3, 0, 0, }, /* 707 */ - { 98, 6, 12, 0, 0, }, /* 708 */ - { 104, 7, 12, 0, 0, }, /* 709 */ - { 104, 26, 12, 0, 0, }, /* 710 */ - { 104, 12, 3, 0, 0, }, /* 711 */ - { 104, 21, 12, 0, 0, }, /* 712 */ - { 9, 10, 3, 0, 0, }, /* 713 */ - { 19, 12, 3, 0, 0, }, /* 714 */ - { 112, 7, 12, 0, 0, }, /* 715 */ - { 112, 15, 12, 0, 0, }, /* 716 */ - { 112, 12, 3, 0, 0, }, /* 717 */ - { 9, 26, 11, 0, 0, }, /* 718 */ - { 26, 26, 12, 0, 0, }, /* 719 */ + { 0, 6, 12, 0, 0, }, /* 202 */ + { 49, 21, 12, 0, 0, }, /* 203 */ + { 49, 1, 2, 0, 0, }, /* 204 */ + { 49, 7, 12, 0, 0, }, /* 205 */ + { 49, 12, 3, 0, 0, }, /* 206 */ + { 55, 7, 12, 0, 0, }, /* 207 */ + { 55, 12, 3, 0, 0, }, /* 208 */ + { 63, 13, 12, 0, 0, }, /* 209 */ + { 63, 7, 12, 0, 0, }, /* 210 */ + { 63, 12, 3, 0, 0, }, /* 211 */ + { 63, 6, 12, 0, 0, }, /* 212 */ + { 63, 26, 12, 0, 0, }, /* 213 */ + { 63, 21, 12, 0, 0, }, /* 214 */ + { 89, 7, 12, 0, 0, }, /* 215 */ + { 89, 12, 3, 0, 0, }, /* 216 */ + { 89, 6, 12, 0, 0, }, /* 217 */ + { 89, 21, 12, 0, 0, }, /* 218 */ + { 94, 7, 12, 0, 0, }, /* 219 */ + { 94, 12, 3, 0, 0, }, /* 220 */ + { 94, 21, 12, 0, 0, }, /* 221 */ + { 14, 12, 3, 0, 0, }, /* 222 */ + { 14, 10, 5, 0, 0, }, /* 223 */ + { 14, 7, 12, 0, 0, }, /* 224 */ + { 14, 13, 12, 0, 0, }, /* 225 */ + { 14, 21, 12, 0, 0, }, /* 226 */ + { 14, 6, 12, 0, 0, }, /* 227 */ + { 2, 7, 12, 0, 0, }, /* 228 */ + { 2, 12, 3, 0, 0, }, /* 229 */ + { 2, 10, 5, 0, 0, }, /* 230 */ + { 2, 10, 3, 0, 0, }, /* 231 */ + { 2, 13, 12, 0, 0, }, /* 232 */ + { 2, 23, 12, 0, 0, }, /* 233 */ + { 2, 15, 12, 0, 0, }, /* 234 */ + { 2, 26, 12, 0, 0, }, /* 235 */ + { 21, 12, 3, 0, 0, }, /* 236 */ + { 21, 10, 5, 0, 0, }, /* 237 */ + { 21, 7, 12, 0, 0, }, /* 238 */ + { 21, 13, 12, 0, 0, }, /* 239 */ + { 20, 12, 3, 0, 0, }, /* 240 */ + { 20, 10, 5, 0, 0, }, /* 241 */ + { 20, 7, 12, 0, 0, }, /* 242 */ + { 20, 13, 12, 0, 0, }, /* 243 */ + { 20, 21, 12, 0, 0, }, /* 244 */ + { 20, 23, 12, 0, 0, }, /* 245 */ + { 43, 12, 3, 0, 0, }, /* 246 */ + { 43, 10, 5, 0, 0, }, /* 247 */ + { 43, 7, 12, 0, 0, }, /* 248 */ + { 43, 10, 3, 0, 0, }, /* 249 */ + { 43, 13, 12, 0, 0, }, /* 250 */ + { 43, 26, 12, 0, 0, }, /* 251 */ + { 43, 15, 12, 0, 0, }, /* 252 */ + { 53, 12, 3, 0, 0, }, /* 253 */ + { 53, 7, 12, 0, 0, }, /* 254 */ + { 53, 10, 3, 0, 0, }, /* 255 */ + { 53, 10, 5, 0, 0, }, /* 256 */ + { 53, 13, 12, 0, 0, }, /* 257 */ + { 53, 15, 12, 0, 0, }, /* 258 */ + { 53, 26, 12, 0, 0, }, /* 259 */ + { 53, 23, 12, 0, 0, }, /* 260 */ + { 54, 12, 3, 0, 0, }, /* 261 */ + { 54, 10, 5, 0, 0, }, /* 262 */ + { 54, 7, 12, 0, 0, }, /* 263 */ + { 54, 13, 12, 0, 0, }, /* 264 */ + { 54, 15, 12, 0, 0, }, /* 265 */ + { 54, 26, 12, 0, 0, }, /* 266 */ + { 28, 12, 3, 0, 0, }, /* 267 */ + { 28, 10, 5, 0, 0, }, /* 268 */ + { 28, 7, 12, 0, 0, }, /* 269 */ + { 28, 10, 3, 0, 0, }, /* 270 */ + { 28, 13, 12, 0, 0, }, /* 271 */ + { 36, 12, 3, 0, 0, }, /* 272 */ + { 36, 10, 5, 0, 0, }, /* 273 */ + { 36, 7, 12, 0, 0, }, /* 274 */ + { 36, 10, 3, 0, 0, }, /* 275 */ + { 36, 13, 12, 0, 0, }, /* 276 */ + { 36, 15, 12, 0, 0, }, /* 277 */ + { 36, 26, 12, 0, 0, }, /* 278 */ + { 47, 10, 5, 0, 0, }, /* 279 */ + { 47, 7, 12, 0, 0, }, /* 280 */ + { 47, 12, 3, 0, 0, }, /* 281 */ + { 47, 10, 3, 0, 0, }, /* 282 */ + { 47, 13, 12, 0, 0, }, /* 283 */ + { 47, 21, 12, 0, 0, }, /* 284 */ + { 56, 7, 12, 0, 0, }, /* 285 */ + { 56, 12, 3, 0, 0, }, /* 286 */ + { 56, 7, 5, 0, 0, }, /* 287 */ + { 56, 6, 12, 0, 0, }, /* 288 */ + { 56, 21, 12, 0, 0, }, /* 289 */ + { 56, 13, 12, 0, 0, }, /* 290 */ + { 32, 7, 12, 0, 0, }, /* 291 */ + { 32, 12, 3, 0, 0, }, /* 292 */ + { 32, 7, 5, 0, 0, }, /* 293 */ + { 32, 6, 12, 0, 0, }, /* 294 */ + { 32, 13, 12, 0, 0, }, /* 295 */ + { 57, 7, 12, 0, 0, }, /* 296 */ + { 57, 26, 12, 0, 0, }, /* 297 */ + { 57, 21, 12, 0, 0, }, /* 298 */ + { 57, 12, 3, 0, 0, }, /* 299 */ + { 57, 13, 12, 0, 0, }, /* 300 */ + { 57, 15, 12, 0, 0, }, /* 301 */ + { 57, 22, 12, 0, 0, }, /* 302 */ + { 57, 18, 12, 0, 0, }, /* 303 */ + { 57, 10, 5, 0, 0, }, /* 304 */ + { 38, 7, 12, 0, 0, }, /* 305 */ + { 38, 10, 12, 0, 0, }, /* 306 */ + { 38, 12, 3, 0, 0, }, /* 307 */ + { 38, 10, 5, 0, 0, }, /* 308 */ + { 38, 13, 12, 0, 0, }, /* 309 */ + { 38, 21, 12, 0, 0, }, /* 310 */ + { 38, 26, 12, 0, 0, }, /* 311 */ + { 16, 9, 12, 0, 7264, }, /* 312 */ + { 16, 7, 12, 0, 0, }, /* 313 */ + { 16, 6, 12, 0, 0, }, /* 314 */ + { 23, 7, 6, 0, 0, }, /* 315 */ + { 23, 7, 7, 0, 0, }, /* 316 */ + { 23, 7, 8, 0, 0, }, /* 317 */ + { 15, 7, 12, 0, 0, }, /* 318 */ + { 15, 12, 3, 0, 0, }, /* 319 */ + { 15, 21, 12, 0, 0, }, /* 320 */ + { 15, 15, 12, 0, 0, }, /* 321 */ + { 15, 26, 12, 0, 0, }, /* 322 */ + { 8, 9, 12, 0, 38864, }, /* 323 */ + { 8, 9, 12, 0, 8, }, /* 324 */ + { 8, 5, 12, 0, -8, }, /* 325 */ + { 7, 17, 12, 0, 0, }, /* 326 */ + { 7, 7, 12, 0, 0, }, /* 327 */ + { 7, 21, 12, 0, 0, }, /* 328 */ + { 40, 29, 12, 0, 0, }, /* 329 */ + { 40, 7, 12, 0, 0, }, /* 330 */ + { 40, 22, 12, 0, 0, }, /* 331 */ + { 40, 18, 12, 0, 0, }, /* 332 */ + { 45, 7, 12, 0, 0, }, /* 333 */ + { 45, 14, 12, 0, 0, }, /* 334 */ + { 50, 7, 12, 0, 0, }, /* 335 */ + { 50, 12, 3, 0, 0, }, /* 336 */ + { 24, 7, 12, 0, 0, }, /* 337 */ + { 24, 12, 3, 0, 0, }, /* 338 */ + { 6, 7, 12, 0, 0, }, /* 339 */ + { 6, 12, 3, 0, 0, }, /* 340 */ + { 51, 7, 12, 0, 0, }, /* 341 */ + { 51, 12, 3, 0, 0, }, /* 342 */ + { 31, 7, 12, 0, 0, }, /* 343 */ + { 31, 12, 3, 0, 0, }, /* 344 */ + { 31, 10, 5, 0, 0, }, /* 345 */ + { 31, 21, 12, 0, 0, }, /* 346 */ + { 31, 6, 12, 0, 0, }, /* 347 */ + { 31, 23, 12, 0, 0, }, /* 348 */ + { 31, 13, 12, 0, 0, }, /* 349 */ + { 31, 15, 12, 0, 0, }, /* 350 */ + { 37, 21, 12, 0, 0, }, /* 351 */ + { 37, 17, 12, 0, 0, }, /* 352 */ + { 37, 12, 3, 0, 0, }, /* 353 */ + { 37, 1, 2, 0, 0, }, /* 354 */ + { 37, 13, 12, 0, 0, }, /* 355 */ + { 37, 7, 12, 0, 0, }, /* 356 */ + { 37, 6, 12, 0, 0, }, /* 357 */ + { 34, 7, 12, 0, 0, }, /* 358 */ + { 34, 12, 3, 0, 0, }, /* 359 */ + { 34, 10, 5, 0, 0, }, /* 360 */ + { 34, 26, 12, 0, 0, }, /* 361 */ + { 34, 21, 12, 0, 0, }, /* 362 */ + { 34, 13, 12, 0, 0, }, /* 363 */ + { 52, 7, 12, 0, 0, }, /* 364 */ + { 39, 7, 12, 0, 0, }, /* 365 */ + { 39, 13, 12, 0, 0, }, /* 366 */ + { 39, 15, 12, 0, 0, }, /* 367 */ + { 39, 26, 12, 0, 0, }, /* 368 */ + { 31, 26, 12, 0, 0, }, /* 369 */ + { 5, 7, 12, 0, 0, }, /* 370 */ + { 5, 12, 3, 0, 0, }, /* 371 */ + { 5, 10, 5, 0, 0, }, /* 372 */ + { 5, 21, 12, 0, 0, }, /* 373 */ + { 90, 7, 12, 0, 0, }, /* 374 */ + { 90, 10, 5, 0, 0, }, /* 375 */ + { 90, 12, 3, 0, 0, }, /* 376 */ + { 90, 10, 12, 0, 0, }, /* 377 */ + { 90, 13, 12, 0, 0, }, /* 378 */ + { 90, 21, 12, 0, 0, }, /* 379 */ + { 90, 6, 12, 0, 0, }, /* 380 */ + { 27, 11, 3, 0, 0, }, /* 381 */ + { 61, 12, 3, 0, 0, }, /* 382 */ + { 61, 10, 5, 0, 0, }, /* 383 */ + { 61, 7, 12, 0, 0, }, /* 384 */ + { 61, 13, 12, 0, 0, }, /* 385 */ + { 61, 21, 12, 0, 0, }, /* 386 */ + { 61, 26, 12, 0, 0, }, /* 387 */ + { 75, 12, 3, 0, 0, }, /* 388 */ + { 75, 10, 5, 0, 0, }, /* 389 */ + { 75, 7, 12, 0, 0, }, /* 390 */ + { 75, 13, 12, 0, 0, }, /* 391 */ + { 92, 7, 12, 0, 0, }, /* 392 */ + { 92, 12, 3, 0, 0, }, /* 393 */ + { 92, 10, 5, 0, 0, }, /* 394 */ + { 92, 21, 12, 0, 0, }, /* 395 */ + { 69, 7, 12, 0, 0, }, /* 396 */ + { 69, 10, 5, 0, 0, }, /* 397 */ + { 69, 12, 3, 0, 0, }, /* 398 */ + { 69, 21, 12, 0, 0, }, /* 399 */ + { 69, 13, 12, 0, 0, }, /* 400 */ + { 72, 13, 12, 0, 0, }, /* 401 */ + { 72, 7, 12, 0, 0, }, /* 402 */ + { 72, 6, 12, 0, 0, }, /* 403 */ + { 72, 21, 12, 0, 0, }, /* 404 */ + { 75, 21, 12, 0, 0, }, /* 405 */ + { 9, 10, 5, 0, 0, }, /* 406 */ + { 9, 7, 12, 0, 0, }, /* 407 */ + { 12, 5, 12, 0, 0, }, /* 408 */ + { 12, 6, 12, 0, 0, }, /* 409 */ + { 33, 5, 12, 0, 35332, }, /* 410 */ + { 33, 5, 12, 0, 3814, }, /* 411 */ + { 33, 9, 12, 63, 1, }, /* 412 */ + { 33, 5, 12, 63, -1, }, /* 413 */ + { 33, 5, 12, 63, -58, }, /* 414 */ + { 33, 9, 12, 0, -7615, }, /* 415 */ + { 19, 5, 12, 0, 8, }, /* 416 */ + { 19, 9, 12, 0, -8, }, /* 417 */ + { 19, 5, 12, 0, 74, }, /* 418 */ + { 19, 5, 12, 0, 86, }, /* 419 */ + { 19, 5, 12, 0, 100, }, /* 420 */ + { 19, 5, 12, 0, 128, }, /* 421 */ + { 19, 5, 12, 0, 112, }, /* 422 */ + { 19, 5, 12, 0, 126, }, /* 423 */ + { 19, 8, 12, 0, -8, }, /* 424 */ + { 19, 5, 12, 0, 9, }, /* 425 */ + { 19, 9, 12, 0, -74, }, /* 426 */ + { 19, 8, 12, 0, -9, }, /* 427 */ + { 19, 5, 12, 21, -7173, }, /* 428 */ + { 19, 9, 12, 0, -86, }, /* 429 */ + { 19, 9, 12, 0, -100, }, /* 430 */ + { 19, 9, 12, 0, -112, }, /* 431 */ + { 19, 9, 12, 0, -128, }, /* 432 */ + { 19, 9, 12, 0, -126, }, /* 433 */ + { 27, 1, 3, 0, 0, }, /* 434 */ + { 9, 27, 2, 0, 0, }, /* 435 */ + { 9, 28, 2, 0, 0, }, /* 436 */ + { 9, 2, 2, 0, 0, }, /* 437 */ + { 9, 9, 12, 0, 0, }, /* 438 */ + { 9, 5, 12, 0, 0, }, /* 439 */ + { 19, 9, 12, 67, -7517, }, /* 440 */ + { 33, 9, 12, 71, -8383, }, /* 441 */ + { 33, 9, 12, 75, -8262, }, /* 442 */ + { 33, 9, 12, 0, 28, }, /* 443 */ + { 33, 5, 12, 0, -28, }, /* 444 */ + { 33, 14, 12, 0, 16, }, /* 445 */ + { 33, 14, 12, 0, -16, }, /* 446 */ + { 33, 14, 12, 0, 0, }, /* 447 */ + { 9, 26, 12, 0, 26, }, /* 448 */ + { 9, 26, 12, 0, -26, }, /* 449 */ + { 4, 26, 12, 0, 0, }, /* 450 */ + { 17, 9, 12, 0, 48, }, /* 451 */ + { 17, 5, 12, 0, -48, }, /* 452 */ + { 33, 9, 12, 0, -10743, }, /* 453 */ + { 33, 9, 12, 0, -3814, }, /* 454 */ + { 33, 9, 12, 0, -10727, }, /* 455 */ + { 33, 5, 12, 0, -10795, }, /* 456 */ + { 33, 5, 12, 0, -10792, }, /* 457 */ + { 33, 9, 12, 0, -10780, }, /* 458 */ + { 33, 9, 12, 0, -10749, }, /* 459 */ + { 33, 9, 12, 0, -10783, }, /* 460 */ + { 33, 9, 12, 0, -10782, }, /* 461 */ + { 33, 9, 12, 0, -10815, }, /* 462 */ + { 10, 5, 12, 0, 0, }, /* 463 */ + { 10, 26, 12, 0, 0, }, /* 464 */ + { 10, 12, 3, 0, 0, }, /* 465 */ + { 10, 21, 12, 0, 0, }, /* 466 */ + { 10, 15, 12, 0, 0, }, /* 467 */ + { 16, 5, 12, 0, -7264, }, /* 468 */ + { 58, 7, 12, 0, 0, }, /* 469 */ + { 58, 6, 12, 0, 0, }, /* 470 */ + { 58, 21, 12, 0, 0, }, /* 471 */ + { 58, 12, 3, 0, 0, }, /* 472 */ + { 22, 26, 12, 0, 0, }, /* 473 */ + { 22, 6, 12, 0, 0, }, /* 474 */ + { 22, 14, 12, 0, 0, }, /* 475 */ + { 23, 10, 3, 0, 0, }, /* 476 */ + { 26, 7, 12, 0, 0, }, /* 477 */ + { 26, 6, 12, 0, 0, }, /* 478 */ + { 29, 7, 12, 0, 0, }, /* 479 */ + { 29, 6, 12, 0, 0, }, /* 480 */ + { 3, 7, 12, 0, 0, }, /* 481 */ + { 23, 7, 12, 0, 0, }, /* 482 */ + { 23, 26, 12, 0, 0, }, /* 483 */ + { 29, 26, 12, 0, 0, }, /* 484 */ + { 22, 7, 12, 0, 0, }, /* 485 */ + { 60, 7, 12, 0, 0, }, /* 486 */ + { 60, 6, 12, 0, 0, }, /* 487 */ + { 60, 26, 12, 0, 0, }, /* 488 */ + { 85, 7, 12, 0, 0, }, /* 489 */ + { 85, 6, 12, 0, 0, }, /* 490 */ + { 85, 21, 12, 0, 0, }, /* 491 */ + { 76, 7, 12, 0, 0, }, /* 492 */ + { 76, 6, 12, 0, 0, }, /* 493 */ + { 76, 21, 12, 0, 0, }, /* 494 */ + { 76, 13, 12, 0, 0, }, /* 495 */ + { 12, 7, 12, 0, 0, }, /* 496 */ + { 12, 21, 12, 0, 0, }, /* 497 */ + { 78, 7, 12, 0, 0, }, /* 498 */ + { 78, 14, 12, 0, 0, }, /* 499 */ + { 78, 12, 3, 0, 0, }, /* 500 */ + { 78, 21, 12, 0, 0, }, /* 501 */ + { 33, 9, 12, 0, -35332, }, /* 502 */ + { 33, 9, 12, 0, -42280, }, /* 503 */ + { 33, 9, 12, 0, -42308, }, /* 504 */ + { 33, 9, 12, 0, -42319, }, /* 505 */ + { 33, 9, 12, 0, -42315, }, /* 506 */ + { 33, 9, 12, 0, -42305, }, /* 507 */ + { 33, 9, 12, 0, -42258, }, /* 508 */ + { 33, 9, 12, 0, -42282, }, /* 509 */ + { 33, 9, 12, 0, -42261, }, /* 510 */ + { 33, 9, 12, 0, 928, }, /* 511 */ + { 48, 7, 12, 0, 0, }, /* 512 */ + { 48, 12, 3, 0, 0, }, /* 513 */ + { 48, 10, 5, 0, 0, }, /* 514 */ + { 48, 26, 12, 0, 0, }, /* 515 */ + { 64, 7, 12, 0, 0, }, /* 516 */ + { 64, 21, 12, 0, 0, }, /* 517 */ + { 74, 10, 5, 0, 0, }, /* 518 */ + { 74, 7, 12, 0, 0, }, /* 519 */ + { 74, 12, 3, 0, 0, }, /* 520 */ + { 74, 21, 12, 0, 0, }, /* 521 */ + { 74, 13, 12, 0, 0, }, /* 522 */ + { 68, 13, 12, 0, 0, }, /* 523 */ + { 68, 7, 12, 0, 0, }, /* 524 */ + { 68, 12, 3, 0, 0, }, /* 525 */ + { 68, 21, 12, 0, 0, }, /* 526 */ + { 73, 7, 12, 0, 0, }, /* 527 */ + { 73, 12, 3, 0, 0, }, /* 528 */ + { 73, 10, 5, 0, 0, }, /* 529 */ + { 73, 21, 12, 0, 0, }, /* 530 */ + { 83, 12, 3, 0, 0, }, /* 531 */ + { 83, 10, 5, 0, 0, }, /* 532 */ + { 83, 7, 12, 0, 0, }, /* 533 */ + { 83, 21, 12, 0, 0, }, /* 534 */ + { 83, 13, 12, 0, 0, }, /* 535 */ + { 38, 6, 12, 0, 0, }, /* 536 */ + { 67, 7, 12, 0, 0, }, /* 537 */ + { 67, 12, 3, 0, 0, }, /* 538 */ + { 67, 10, 5, 0, 0, }, /* 539 */ + { 67, 13, 12, 0, 0, }, /* 540 */ + { 67, 21, 12, 0, 0, }, /* 541 */ + { 91, 7, 12, 0, 0, }, /* 542 */ + { 91, 12, 3, 0, 0, }, /* 543 */ + { 91, 6, 12, 0, 0, }, /* 544 */ + { 91, 21, 12, 0, 0, }, /* 545 */ + { 86, 7, 12, 0, 0, }, /* 546 */ + { 86, 10, 5, 0, 0, }, /* 547 */ + { 86, 12, 3, 0, 0, }, /* 548 */ + { 86, 21, 12, 0, 0, }, /* 549 */ + { 86, 6, 12, 0, 0, }, /* 550 */ + { 33, 5, 12, 0, -928, }, /* 551 */ + { 8, 5, 12, 0, -38864, }, /* 552 */ + { 86, 13, 12, 0, 0, }, /* 553 */ + { 23, 7, 9, 0, 0, }, /* 554 */ + { 23, 7, 10, 0, 0, }, /* 555 */ + { 9, 4, 2, 0, 0, }, /* 556 */ + { 9, 3, 12, 0, 0, }, /* 557 */ + { 25, 25, 12, 0, 0, }, /* 558 */ + { 0, 24, 12, 0, 0, }, /* 559 */ + { 9, 6, 3, 0, 0, }, /* 560 */ + { 35, 7, 12, 0, 0, }, /* 561 */ + { 19, 14, 12, 0, 0, }, /* 562 */ + { 19, 15, 12, 0, 0, }, /* 563 */ + { 19, 26, 12, 0, 0, }, /* 564 */ + { 70, 7, 12, 0, 0, }, /* 565 */ + { 66, 7, 12, 0, 0, }, /* 566 */ + { 41, 7, 12, 0, 0, }, /* 567 */ + { 41, 15, 12, 0, 0, }, /* 568 */ + { 18, 7, 12, 0, 0, }, /* 569 */ + { 18, 14, 12, 0, 0, }, /* 570 */ + { 117, 7, 12, 0, 0, }, /* 571 */ + { 117, 12, 3, 0, 0, }, /* 572 */ + { 59, 7, 12, 0, 0, }, /* 573 */ + { 59, 21, 12, 0, 0, }, /* 574 */ + { 42, 7, 12, 0, 0, }, /* 575 */ + { 42, 21, 12, 0, 0, }, /* 576 */ + { 42, 14, 12, 0, 0, }, /* 577 */ + { 13, 9, 12, 0, 40, }, /* 578 */ + { 13, 5, 12, 0, -40, }, /* 579 */ + { 46, 7, 12, 0, 0, }, /* 580 */ + { 44, 7, 12, 0, 0, }, /* 581 */ + { 44, 13, 12, 0, 0, }, /* 582 */ + { 105, 7, 12, 0, 0, }, /* 583 */ + { 103, 7, 12, 0, 0, }, /* 584 */ + { 103, 21, 12, 0, 0, }, /* 585 */ + { 109, 7, 12, 0, 0, }, /* 586 */ + { 11, 7, 12, 0, 0, }, /* 587 */ + { 80, 7, 12, 0, 0, }, /* 588 */ + { 80, 21, 12, 0, 0, }, /* 589 */ + { 80, 15, 12, 0, 0, }, /* 590 */ + { 119, 7, 12, 0, 0, }, /* 591 */ + { 119, 26, 12, 0, 0, }, /* 592 */ + { 119, 15, 12, 0, 0, }, /* 593 */ + { 115, 7, 12, 0, 0, }, /* 594 */ + { 115, 15, 12, 0, 0, }, /* 595 */ + { 127, 7, 12, 0, 0, }, /* 596 */ + { 127, 15, 12, 0, 0, }, /* 597 */ + { 65, 7, 12, 0, 0, }, /* 598 */ + { 65, 15, 12, 0, 0, }, /* 599 */ + { 65, 21, 12, 0, 0, }, /* 600 */ + { 71, 7, 12, 0, 0, }, /* 601 */ + { 71, 21, 12, 0, 0, }, /* 602 */ + { 97, 7, 12, 0, 0, }, /* 603 */ + { 96, 7, 12, 0, 0, }, /* 604 */ + { 96, 15, 12, 0, 0, }, /* 605 */ + { 30, 7, 12, 0, 0, }, /* 606 */ + { 30, 12, 3, 0, 0, }, /* 607 */ + { 30, 15, 12, 0, 0, }, /* 608 */ + { 30, 21, 12, 0, 0, }, /* 609 */ + { 87, 7, 12, 0, 0, }, /* 610 */ + { 87, 15, 12, 0, 0, }, /* 611 */ + { 87, 21, 12, 0, 0, }, /* 612 */ + { 116, 7, 12, 0, 0, }, /* 613 */ + { 116, 15, 12, 0, 0, }, /* 614 */ + { 111, 7, 12, 0, 0, }, /* 615 */ + { 111, 26, 12, 0, 0, }, /* 616 */ + { 111, 12, 3, 0, 0, }, /* 617 */ + { 111, 15, 12, 0, 0, }, /* 618 */ + { 111, 21, 12, 0, 0, }, /* 619 */ + { 77, 7, 12, 0, 0, }, /* 620 */ + { 77, 21, 12, 0, 0, }, /* 621 */ + { 82, 7, 12, 0, 0, }, /* 622 */ + { 82, 15, 12, 0, 0, }, /* 623 */ + { 81, 7, 12, 0, 0, }, /* 624 */ + { 81, 15, 12, 0, 0, }, /* 625 */ + { 120, 7, 12, 0, 0, }, /* 626 */ + { 120, 21, 12, 0, 0, }, /* 627 */ + { 120, 15, 12, 0, 0, }, /* 628 */ + { 88, 7, 12, 0, 0, }, /* 629 */ + { 129, 9, 12, 0, 64, }, /* 630 */ + { 129, 5, 12, 0, -64, }, /* 631 */ + { 129, 15, 12, 0, 0, }, /* 632 */ + { 0, 15, 12, 0, 0, }, /* 633 */ + { 93, 10, 5, 0, 0, }, /* 634 */ + { 93, 12, 3, 0, 0, }, /* 635 */ + { 93, 7, 12, 0, 0, }, /* 636 */ + { 93, 21, 12, 0, 0, }, /* 637 */ + { 93, 15, 12, 0, 0, }, /* 638 */ + { 93, 13, 12, 0, 0, }, /* 639 */ + { 84, 12, 3, 0, 0, }, /* 640 */ + { 84, 10, 5, 0, 0, }, /* 641 */ + { 84, 7, 12, 0, 0, }, /* 642 */ + { 84, 21, 12, 0, 0, }, /* 643 */ + { 84, 1, 2, 0, 0, }, /* 644 */ + { 100, 7, 12, 0, 0, }, /* 645 */ + { 100, 13, 12, 0, 0, }, /* 646 */ + { 95, 12, 3, 0, 0, }, /* 647 */ + { 95, 7, 12, 0, 0, }, /* 648 */ + { 95, 10, 5, 0, 0, }, /* 649 */ + { 95, 13, 12, 0, 0, }, /* 650 */ + { 95, 21, 12, 0, 0, }, /* 651 */ + { 110, 7, 12, 0, 0, }, /* 652 */ + { 110, 12, 3, 0, 0, }, /* 653 */ + { 110, 21, 12, 0, 0, }, /* 654 */ + { 99, 12, 3, 0, 0, }, /* 655 */ + { 99, 10, 5, 0, 0, }, /* 656 */ + { 99, 7, 12, 0, 0, }, /* 657 */ + { 99, 21, 12, 0, 0, }, /* 658 */ + { 99, 13, 12, 0, 0, }, /* 659 */ + { 47, 15, 12, 0, 0, }, /* 660 */ + { 107, 7, 12, 0, 0, }, /* 661 */ + { 107, 10, 5, 0, 0, }, /* 662 */ + { 107, 12, 3, 0, 0, }, /* 663 */ + { 107, 21, 12, 0, 0, }, /* 664 */ + { 128, 7, 12, 0, 0, }, /* 665 */ + { 128, 21, 12, 0, 0, }, /* 666 */ + { 108, 7, 12, 0, 0, }, /* 667 */ + { 108, 12, 3, 0, 0, }, /* 668 */ + { 108, 10, 5, 0, 0, }, /* 669 */ + { 108, 13, 12, 0, 0, }, /* 670 */ + { 106, 12, 3, 0, 0, }, /* 671 */ + { 106, 10, 5, 0, 0, }, /* 672 */ + { 106, 7, 12, 0, 0, }, /* 673 */ + { 106, 10, 3, 0, 0, }, /* 674 */ + { 123, 7, 12, 0, 0, }, /* 675 */ + { 123, 10, 3, 0, 0, }, /* 676 */ + { 123, 10, 5, 0, 0, }, /* 677 */ + { 123, 12, 3, 0, 0, }, /* 678 */ + { 123, 21, 12, 0, 0, }, /* 679 */ + { 123, 13, 12, 0, 0, }, /* 680 */ + { 122, 7, 12, 0, 0, }, /* 681 */ + { 122, 10, 3, 0, 0, }, /* 682 */ + { 122, 10, 5, 0, 0, }, /* 683 */ + { 122, 12, 3, 0, 0, }, /* 684 */ + { 122, 21, 12, 0, 0, }, /* 685 */ + { 113, 7, 12, 0, 0, }, /* 686 */ + { 113, 10, 5, 0, 0, }, /* 687 */ + { 113, 12, 3, 0, 0, }, /* 688 */ + { 113, 21, 12, 0, 0, }, /* 689 */ + { 113, 13, 12, 0, 0, }, /* 690 */ + { 101, 7, 12, 0, 0, }, /* 691 */ + { 101, 12, 3, 0, 0, }, /* 692 */ + { 101, 10, 5, 0, 0, }, /* 693 */ + { 101, 13, 12, 0, 0, }, /* 694 */ + { 125, 7, 12, 0, 0, }, /* 695 */ + { 125, 12, 3, 0, 0, }, /* 696 */ + { 125, 10, 5, 0, 0, }, /* 697 */ + { 125, 13, 12, 0, 0, }, /* 698 */ + { 125, 15, 12, 0, 0, }, /* 699 */ + { 125, 21, 12, 0, 0, }, /* 700 */ + { 125, 26, 12, 0, 0, }, /* 701 */ + { 124, 9, 12, 0, 32, }, /* 702 */ + { 124, 5, 12, 0, -32, }, /* 703 */ + { 124, 13, 12, 0, 0, }, /* 704 */ + { 124, 15, 12, 0, 0, }, /* 705 */ + { 124, 7, 12, 0, 0, }, /* 706 */ + { 121, 7, 12, 0, 0, }, /* 707 */ + { 62, 7, 12, 0, 0, }, /* 708 */ + { 62, 14, 12, 0, 0, }, /* 709 */ + { 62, 21, 12, 0, 0, }, /* 710 */ + { 79, 7, 12, 0, 0, }, /* 711 */ + { 126, 7, 12, 0, 0, }, /* 712 */ + { 114, 7, 12, 0, 0, }, /* 713 */ + { 114, 13, 12, 0, 0, }, /* 714 */ + { 114, 21, 12, 0, 0, }, /* 715 */ + { 102, 7, 12, 0, 0, }, /* 716 */ + { 102, 12, 3, 0, 0, }, /* 717 */ + { 102, 21, 12, 0, 0, }, /* 718 */ + { 118, 7, 12, 0, 0, }, /* 719 */ + { 118, 12, 3, 0, 0, }, /* 720 */ + { 118, 21, 12, 0, 0, }, /* 721 */ + { 118, 26, 12, 0, 0, }, /* 722 */ + { 118, 6, 12, 0, 0, }, /* 723 */ + { 118, 13, 12, 0, 0, }, /* 724 */ + { 118, 15, 12, 0, 0, }, /* 725 */ + { 98, 7, 12, 0, 0, }, /* 726 */ + { 98, 10, 5, 0, 0, }, /* 727 */ + { 98, 12, 3, 0, 0, }, /* 728 */ + { 98, 6, 12, 0, 0, }, /* 729 */ + { 104, 7, 12, 0, 0, }, /* 730 */ + { 104, 26, 12, 0, 0, }, /* 731 */ + { 104, 12, 3, 0, 0, }, /* 732 */ + { 104, 21, 12, 0, 0, }, /* 733 */ + { 9, 10, 3, 0, 0, }, /* 734 */ + { 19, 12, 3, 0, 0, }, /* 735 */ + { 130, 26, 12, 0, 0, }, /* 736 */ + { 130, 12, 3, 0, 0, }, /* 737 */ + { 130, 21, 12, 0, 0, }, /* 738 */ + { 112, 7, 12, 0, 0, }, /* 739 */ + { 112, 15, 12, 0, 0, }, /* 740 */ + { 112, 12, 3, 0, 0, }, /* 741 */ + { 9, 26, 11, 0, 0, }, /* 742 */ + { 26, 26, 12, 0, 0, }, /* 743 */ }; const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */ @@ -839,19 +863,19 @@ const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F000 */ 123,123, 95, 95,124,125,126,127,128,128,129,130,131,132,133,134, /* U+F800 */ 135,136,137,138,139,140,141,142,143,144,145,139,146,146,147,139, /* U+10000 */ -148,149,150,151,152,153,154,155,156,139,139,139,157,139,139,139, /* U+10800 */ -158,159,160,161,162,163,164,139,139,165,139,166,167,168,139,139, /* U+11000 */ -139,169,139,139,139,170,139,139,139,139,139,139,139,139,139,139, /* U+11800 */ -171,171,171,171,171,171,171,172,173,139,139,139,139,139,139,139, /* U+12000 */ +148,149,150,151,152,153,154,155,156,157,139,139,158,139,139,139, /* U+10800 */ +159,160,161,162,163,164,165,139,139,166,139,167,168,169,170,139, /* U+11000 */ +139,171,139,139,139,172,139,139,139,139,139,139,139,139,139,139, /* U+11800 */ +173,173,173,173,173,173,173,174,175,173,176,139,139,139,139,139, /* U+12000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+12800 */ -174,174,174,174,174,174,174,174,175,139,139,139,139,139,139,139, /* U+13000 */ +177,177,177,177,177,177,177,177,178,139,139,139,139,139,139,139, /* U+13000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+13800 */ -139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+14000 */ +139,139,139,139,139,139,139,139,179,179,179,179,180,139,139,139, /* U+14000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+14800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+15000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+15800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+16000 */ -176,176,176,176,177,178,179,180,139,139,139,139,139,139,181,182, /* U+16800 */ +181,181,181,181,182,183,184,185,139,139,139,139,139,139,186,187, /* U+16800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+17000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+17800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+18000 */ @@ -860,16 +884,16 @@ const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+19800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1A000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1A800 */ -183,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1B000 */ -139,139,139,139,139,139,139,139,184,185,139,139,139,139,139,139, /* U+1B800 */ +188,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1B000 */ +139,139,139,139,139,139,139,139,189,190,139,139,139,139,139,139, /* U+1B800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1C000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1C800 */ - 71,186,187,188,189,139,190,139,191,192,193,194,195,196,197,198, /* U+1D000 */ -139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1D800 */ + 71,191,192,193,194,139,195,139,196,197,198,199,200,201,202,203, /* U+1D000 */ +204,204,204,204,205,206,139,139,139,139,139,139,139,139,139,139, /* U+1D800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1E000 */ -199,200,139,139,139,139,139,139,139,139,139,139,201,202,139,139, /* U+1E800 */ -203,204,205,206,207,139,208,209, 71,210,211,212,213,214,215,216, /* U+1F000 */ -217,218,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1F800 */ +207,208,139,139,139,139,139,139,139,139,139,139,209,210,139,139, /* U+1E800 */ +211,212,213,214,215,139, 71,216, 71, 71,217,218, 71,219,220,221, /* U+1F000 */ +222,223,224,225,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1F800 */ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+20000 */ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+20800 */ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+21000 */ @@ -890,18 +914,18 @@ const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+28800 */ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+29000 */ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+29800 */ - 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,219, 95, 95, /* U+2A000 */ + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,226, 95, 95, /* U+2A000 */ 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+2A800 */ - 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,220, 95, /* U+2B000 */ -221,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2B800 */ -139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2C000 */ -139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2C800 */ + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,227, 95, /* U+2B000 */ +228, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+2B800 */ + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+2C000 */ + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,229,139,139, /* U+2C800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2D000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2D800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2E000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2E800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2F000 */ - 95, 95, 95, 95,221,139,139,139,139,139,139,139,139,139,139,139, /* U+2F800 */ + 95, 95, 95, 95,230,139,139,139,139,139,139,139,139,139,139,139, /* U+2F800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+30000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+30800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+31000 */ @@ -1254,8 +1278,8 @@ const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DE800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DF000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DF800 */ -222,223,224,225,223,223,223,223,223,223,223,223,223,223,223,223, /* U+E0000 */ -223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223, /* U+E0800 */ +231,232,233,234,232,232,232,232,232,232,232,232,232,232,232,232, /* U+E0000 */ +232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232, /* U+E0800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E1000 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E1800 */ 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E2000 */ @@ -1317,7 +1341,7 @@ const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FE000 */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FE800 */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FF000 */ -123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,226, /* U+FF800 */ +123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,235, /* U+FF800 */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+100000 */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+100800 */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+101000 */ @@ -1349,10 +1373,10 @@ const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10E000 */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10E800 */ 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10F000 */ -123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,226, /* U+10F800 */ +123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,235, /* U+10F800 */ }; -const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ +const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */ /* block 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1405,533 +1429,533 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ /* block 5 */ 99, 33, 33, 99, 33, 33, 33,100, 99,101,102,102,103, 33, 33, 33, - 33, 33,104, 33, 20, 33, 33, 33, 33, 33, 33, 33, 33, 33,105, 33, + 33, 33,104, 33, 20, 33, 33, 33, 33, 33, 33, 33, 33,105,106, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, -106,106,106,106,106,106,106,106,106,107,107,107,107,107,107,107, -107,107, 14, 14, 14, 14,107,107,107,107,107,107,107,107,107,107, -107,107, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, -106,106,106,106,106, 14, 14, 14, 14, 14,108,108,107, 14,107, 14, +107,107,107,107,107,107,107,107,107,108,108,108,108,108,108,108, +108,108, 14, 14, 14, 14,108,108,108,108,108,108,108,108,108,108, +108,108, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +107,107,107,107,107, 14, 14, 14, 14, 14,109,109,108, 14,108, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, /* block 6 */ -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,110,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -111,112,111,112,107,113,111,112,114,114,115,116,116,116, 4,117, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,111,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +112,113,112,113,108,114,112,113,115,115,116,117,117,117, 4,118, /* block 7 */ -114,114,114,114,113, 14,118, 4,119,119,119,114,120,114,121,121, -122,123,124,123,123,125,123,123,126,127,128,123,129,123,123,123, -130,131,114,132,123,123,133,123,123,134,123,123,135,136,136,136, -122,137,138,137,137,139,137,137,140,141,142,137,143,137,137,137, -144,145,146,147,137,137,148,137,137,149,137,137,150,151,151,152, -153,154,155,155,155,156,157,158,111,112,111,112,111,112,111,112, -111,112,159,160,159,160,159,160,159,160,159,160,159,160,159,160, -161,162,163,164,165,166,167,111,112,168,111,112,122,169,169,169, +115,115,115,115,114, 14,119, 4,120,120,120,115,121,115,122,122, +123,124,125,124,124,126,124,124,127,128,129,124,130,124,124,124, +131,132,115,133,124,124,134,124,124,135,124,124,136,137,137,137, +123,138,139,138,138,140,138,138,141,142,143,138,144,138,138,138, +145,146,147,148,138,138,149,138,138,150,138,138,151,152,152,153, +154,155,156,156,156,157,158,159,112,113,112,113,112,113,112,113, +112,113,160,161,160,161,160,161,160,161,160,161,160,161,160,161, +162,163,164,165,166,167,168,112,113,169,112,113,123,170,170,170, /* block 8 */ -170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170, -171,171,171,171,171,171,171,171,171,171,171,171,171,171,171,171, 171,171,171,171,171,171,171,171,171,171,171,171,171,171,171,171, 172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,172, 172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,172, 173,173,173,173,173,173,173,173,173,173,173,173,173,173,173,173, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, +173,173,173,173,173,173,173,173,173,173,173,173,173,173,173,173, +174,174,174,174,174,174,174,174,174,174,174,174,174,174,174,174, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, /* block 9 */ -174,175,176,177,177,109,109,177,178,178,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -179,174,175,174,175,174,175,174,175,174,175,174,175,174,175,180, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, +175,176,177,178,178,110,110,178,179,179,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +180,175,176,175,176,175,176,175,176,175,176,175,176,175,176,181, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, /* block 10 */ -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -114,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181, -181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181, -181,181,181,181,181,181,181,114,114,182,183,183,183,183,183,183, -114,184,184,184,184,184,184,184,184,184,184,184,184,184,184,184, -184,184,184,184,184,184,184,184,184,184,184,184,184,184,184,184, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +115,182,182,182,182,182,182,182,182,182,182,182,182,182,182,182, +182,182,182,182,182,182,182,182,182,182,182,182,182,182,182,182, +182,182,182,182,182,182,182,115,115,183,184,184,184,184,184,184, +115,185,185,185,185,185,185,185,185,185,185,185,185,185,185,185, +185,185,185,185,185,185,185,185,185,185,185,185,185,185,185,185, /* block 11 */ -184,184,184,184,184,184,184,185,114, 4,186,114,114,187,187,188, -114,189,189,189,189,189,189,189,189,189,189,189,189,189,189,189, -189,189,189,189,189,189,189,189,189,189,189,189,189,189,189,189, -189,189,189,189,189,189,189,189,189,189,189,189,189,189,190,189, -191,189,189,191,189,189,191,189,114,114,114,114,114,114,114,114, -192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192, -192,192,192,192,192,192,192,192,192,192,192,114,114,114,114,114, -192,192,192,191,191,114,114,114,114,114,114,114,114,114,114,114, +185,185,185,185,185,185,185,186,115, 4,187,115,115,188,188,189, +115,190,190,190,190,190,190,190,190,190,190,190,190,190,190,190, +190,190,190,190,190,190,190,190,190,190,190,190,190,190,190,190, +190,190,190,190,190,190,190,190,190,190,190,190,190,190,191,190, +192,190,190,192,190,190,192,190,115,115,115,115,115,115,115,115, +193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,193, +193,193,193,193,193,193,193,193,193,193,193,115,115,115,115,115, +193,193,193,192,192,115,115,115,115,115,115,115,115,115,115,115, /* block 12 */ -193,193,193,193,193, 22,194,194,194,195,195,196, 4,195,197,197, -198,198,198,198,198,198,198,198,198,198,198, 4, 22,114,195, 4, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -107,199,199,199,199,199,199,199,199,199,199,109,109,109,109,109, -109,109,109,109,109,109,198,198,198,198,198,198,198,198,198,198, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,195,195,195,195,199,199, -109,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, +194,194,194,194,194, 22,195,195,195,196,196,197, 4,196,198,198, +199,199,199,199,199,199,199,199,199,199,199, 4, 22,115,196, 4, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +108,200,200,200,200,200,200,200,200,200,200,110,110,110,110,110, +110,110,110,110,110,110,199,199,199,199,199,199,199,199,199,199, +201,201,201,201,201,201,201,201,201,201,196,196,196,196,200,200, +110,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, /* block 13 */ -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,195,199,198,198,198,198,198,198,198, 22,197,198, -198,198,198,198,198,200,200,198,198,197,198,198,198,198,199,199, -201,201,201,201,201,201,201,201,201,201,199,199,199,197,197,199, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,196,200,199,199,199,199,199,199,199, 22,198,199, +199,199,199,199,199,202,202,199,199,198,199,199,199,199,200,200, +201,201,201,201,201,201,201,201,201,201,200,200,200,198,198,200, /* block 14 */ -202,202,202,202,202,202,202,202,202,202,202,202,202,202,114,203, -204,205,204,204,204,204,204,204,204,204,204,204,204,204,204,204, -204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204, +203,203,203,203,203,203,203,203,203,203,203,203,203,203,115,204, +205,206,205,205,205,205,205,205,205,205,205,205,205,205,205,205, 205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205, -205,205,205,205,205,205,205,205,205,205,205,114,114,204,204,204, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, +206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206, +206,206,206,206,206,206,206,206,206,206,206,115,115,205,205,205, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, /* block 15 */ -206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206, -206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206, -206,206,206,206,206,206,207,207,207,207,207,207,207,207,207,207, -207,206,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -208,208,208,208,208,208,208,208,208,208,209,209,209,209,209,209, -209,209,209,209,209,209,209,209,209,209,209,209,209,209,209,209, -209,209,209,209,209,209,209,209,209,209,209,210,210,210,210,210, -210,210,210,210,211,211,212,213,213,213,211,114,114,114,114,114, +207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207, +207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207, +207,207,207,207,207,207,208,208,208,208,208,208,208,208,208,208, +208,207,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +209,209,209,209,209,209,209,209,209,209,210,210,210,210,210,210, +210,210,210,210,210,210,210,210,210,210,210,210,210,210,210,210, +210,210,210,210,210,210,210,210,210,210,210,211,211,211,211,211, +211,211,211,211,212,212,213,214,214,214,212,115,115,115,115,115, /* block 16 */ -214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,214, -214,214,214,214,214,214,215,215,215,215,216,215,215,215,215,215, -215,215,215,215,216,215,215,215,216,215,215,215,215,215,114,114, -217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,114, -218,218,218,218,218,218,218,218,218,218,218,218,218,218,218,218, -218,218,218,218,218,218,218,218,218,219,219,219,114,114,220,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,215, +215,215,215,215,215,215,216,216,216,216,217,216,216,216,216,216, +216,216,216,216,217,216,216,216,217,216,216,216,216,216,115,115, +218,218,218,218,218,218,218,218,218,218,218,218,218,218,218,115, +219,219,219,219,219,219,219,219,219,219,219,219,219,219,219,219, +219,219,219,219,219,219,219,219,219,220,220,220,115,115,221,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 17 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,199,199,199,199,199,199,199,199,199,199,199,199,199, 199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,198,198,198,198,198,198,198,198,198,198,198,198, -198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,198, /* block 18 */ -221,221,221,222,223,223,223,223,223,223,223,223,223,223,223,223, -223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223, -223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223, -223,223,223,223,223,223,223,223,223,223,221,222,221,223,222,222, -222,221,221,221,221,221,221,221,221,222,222,222,222,221,222,222, -223,109,109,221,221,221,221,221,223,223,223,223,223,223,223,223, -223,223,221,221, 4, 4,224,224,224,224,224,224,224,224,224,224, -225,226,223,223,223,223,223,223,223,223,223,223,223,223,223,223, +222,222,222,223,224,224,224,224,224,224,224,224,224,224,224,224, +224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, +224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, +224,224,224,224,224,224,224,224,224,224,222,223,222,224,223,223, +223,222,222,222,222,222,222,222,222,223,223,223,223,222,223,223, +224,110,110,222,222,222,222,222,224,224,224,224,224,224,224,224, +224,224,222,222, 4, 4,225,225,225,225,225,225,225,225,225,225, +226,227,224,224,224,224,224,224,224,224,224,224,224,224,224,224, /* block 19 */ -227,228,229,229,114,227,227,227,227,227,227,227,227,114,114,227, -227,114,114,227,227,227,227,227,227,227,227,227,227,227,227,227, -227,227,227,227,227,227,227,227,227,114,227,227,227,227,227,227, -227,114,227,114,114,114,227,227,227,227,114,114,228,227,230,229, -229,228,228,228,228,114,114,229,229,114,114,229,229,228,227,114, -114,114,114,114,114,114,114,230,114,114,114,114,227,227,114,227, -227,227,228,228,114,114,231,231,231,231,231,231,231,231,231,231, -227,227,232,232,233,233,233,233,233,233,234,232,114,114,114,114, +228,229,230,230,115,228,228,228,228,228,228,228,228,115,115,228, +228,115,115,228,228,228,228,228,228,228,228,228,228,228,228,228, +228,228,228,228,228,228,228,228,228,115,228,228,228,228,228,228, +228,115,228,115,115,115,228,228,228,228,115,115,229,228,231,230, +230,229,229,229,229,115,115,230,230,115,115,230,230,229,228,115, +115,115,115,115,115,115,115,231,115,115,115,115,228,228,115,228, +228,228,229,229,115,115,232,232,232,232,232,232,232,232,232,232, +228,228,233,233,234,234,234,234,234,234,235,233,115,115,115,115, /* block 20 */ -114,235,235,236,114,237,237,237,237,237,237,114,114,114,114,237, -237,114,114,237,237,237,237,237,237,237,237,237,237,237,237,237, -237,237,237,237,237,237,237,237,237,114,237,237,237,237,237,237, -237,114,237,237,114,237,237,114,237,237,114,114,235,114,236,236, -236,235,235,114,114,114,114,235,235,114,114,235,235,235,114,114, -114,235,114,114,114,114,114,114,114,237,237,237,237,114,237,114, -114,114,114,114,114,114,238,238,238,238,238,238,238,238,238,238, -235,235,237,237,237,235,114,114,114,114,114,114,114,114,114,114, +115,236,236,237,115,238,238,238,238,238,238,115,115,115,115,238, +238,115,115,238,238,238,238,238,238,238,238,238,238,238,238,238, +238,238,238,238,238,238,238,238,238,115,238,238,238,238,238,238, +238,115,238,238,115,238,238,115,238,238,115,115,236,115,237,237, +237,236,236,115,115,115,115,236,236,115,115,236,236,236,115,115, +115,236,115,115,115,115,115,115,115,238,238,238,238,115,238,115, +115,115,115,115,115,115,239,239,239,239,239,239,239,239,239,239, +236,236,238,238,238,236,115,115,115,115,115,115,115,115,115,115, /* block 21 */ -114,239,239,240,114,241,241,241,241,241,241,241,241,241,114,241, -241,241,114,241,241,241,241,241,241,241,241,241,241,241,241,241, -241,241,241,241,241,241,241,241,241,114,241,241,241,241,241,241, -241,114,241,241,114,241,241,241,241,241,114,114,239,241,240,240, -240,239,239,239,239,239,114,239,239,240,114,240,240,239,114,114, -241,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -241,241,239,239,114,114,242,242,242,242,242,242,242,242,242,242, -243,244,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,240,240,241,115,242,242,242,242,242,242,242,242,242,115,242, +242,242,115,242,242,242,242,242,242,242,242,242,242,242,242,242, +242,242,242,242,242,242,242,242,242,115,242,242,242,242,242,242, +242,115,242,242,115,242,242,242,242,242,115,115,240,242,241,241, +241,240,240,240,240,240,115,240,240,241,115,241,241,240,115,115, +242,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +242,242,240,240,115,115,243,243,243,243,243,243,243,243,243,243, +244,245,115,115,115,115,115,115,115,242,115,115,115,115,115,115, /* block 22 */ -114,245,246,246,114,247,247,247,247,247,247,247,247,114,114,247, -247,114,114,247,247,247,247,247,247,247,247,247,247,247,247,247, -247,247,247,247,247,247,247,247,247,114,247,247,247,247,247,247, -247,114,247,247,114,247,247,247,247,247,114,114,245,247,248,245, -246,245,245,245,245,114,114,246,246,114,114,246,246,245,114,114, -114,114,114,114,114,114,245,248,114,114,114,114,247,247,114,247, -247,247,245,245,114,114,249,249,249,249,249,249,249,249,249,249, -250,247,251,251,251,251,251,251,114,114,114,114,114,114,114,114, +115,246,247,247,115,248,248,248,248,248,248,248,248,115,115,248, +248,115,115,248,248,248,248,248,248,248,248,248,248,248,248,248, +248,248,248,248,248,248,248,248,248,115,248,248,248,248,248,248, +248,115,248,248,115,248,248,248,248,248,115,115,246,248,249,246, +247,246,246,246,246,115,115,247,247,115,115,247,247,246,115,115, +115,115,115,115,115,115,246,249,115,115,115,115,248,248,115,248, +248,248,246,246,115,115,250,250,250,250,250,250,250,250,250,250, +251,248,252,252,252,252,252,252,115,115,115,115,115,115,115,115, /* block 23 */ -114,114,252,253,114,253,253,253,253,253,253,114,114,114,253,253, -253,114,253,253,253,253,114,114,114,253,253,114,253,114,253,253, -114,114,114,253,253,114,114,114,253,253,253,114,114,114,253,253, -253,253,253,253,253,253,253,253,253,253,114,114,114,114,254,255, -252,255,255,114,114,114,255,255,255,114,255,255,255,252,114,114, -253,114,114,114,114,114,114,254,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,256,256,256,256,256,256,256,256,256,256, -257,257,257,258,258,258,258,258,258,259,258,114,114,114,114,114, +115,115,253,254,115,254,254,254,254,254,254,115,115,115,254,254, +254,115,254,254,254,254,115,115,115,254,254,115,254,115,254,254, +115,115,115,254,254,115,115,115,254,254,254,115,115,115,254,254, +254,254,254,254,254,254,254,254,254,254,115,115,115,115,255,256, +253,256,256,115,115,115,256,256,256,115,256,256,256,253,115,115, +254,115,115,115,115,115,115,255,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,257,257,257,257,257,257,257,257,257,257, +258,258,258,259,259,259,259,259,259,260,259,115,115,115,115,115, /* block 24 */ -260,261,261,261,114,262,262,262,262,262,262,262,262,114,262,262, -262,114,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,114,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,114,114,114,262,260,260, -260,261,261,261,261,114,260,260,260,114,260,260,260,260,114,114, -114,114,114,114,114,260,260,114,262,262,114,114,114,114,114,114, -262,262,260,260,114,114,263,263,263,263,263,263,263,263,263,263, -114,114,114,114,114,114,114,114,264,264,264,264,264,264,264,265, +261,262,262,262,115,263,263,263,263,263,263,263,263,115,263,263, +263,115,263,263,263,263,263,263,263,263,263,263,263,263,263,263, +263,263,263,263,263,263,263,263,263,115,263,263,263,263,263,263, +263,263,263,263,263,263,263,263,263,263,115,115,115,263,261,261, +261,262,262,262,262,115,261,261,261,115,261,261,261,261,115,115, +115,115,115,115,115,261,261,115,263,263,263,115,115,115,115,115, +263,263,261,261,115,115,264,264,264,264,264,264,264,264,264,264, +115,115,115,115,115,115,115,115,265,265,265,265,265,265,265,266, /* block 25 */ -114,266,267,267,114,268,268,268,268,268,268,268,268,114,268,268, -268,114,268,268,268,268,268,268,268,268,268,268,268,268,268,268, -268,268,268,268,268,268,268,268,268,114,268,268,268,268,268,268, -268,268,268,268,114,268,268,268,268,268,114,114,266,268,267,266, -267,267,269,267,267,114,266,267,267,114,267,267,266,266,114,114, -114,114,114,114,114,269,269,114,114,114,114,114,114,114,268,114, -268,268,266,266,114,114,270,270,270,270,270,270,270,270,270,270, -114,268,268,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,267,268,268,115,269,269,269,269,269,269,269,269,115,269,269, +269,115,269,269,269,269,269,269,269,269,269,269,269,269,269,269, +269,269,269,269,269,269,269,269,269,115,269,269,269,269,269,269, +269,269,269,269,115,269,269,269,269,269,115,115,267,269,268,267, +268,268,270,268,268,115,267,268,268,115,268,268,267,267,115,115, +115,115,115,115,115,270,270,115,115,115,115,115,115,115,269,115, +269,269,267,267,115,115,271,271,271,271,271,271,271,271,271,271, +115,269,269,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 26 */ -114,271,272,272,114,273,273,273,273,273,273,273,273,114,273,273, -273,114,273,273,273,273,273,273,273,273,273,273,273,273,273,273, -273,273,273,273,273,273,273,273,273,273,273,273,273,273,273,273, -273,273,273,273,273,273,273,273,273,273,273,114,114,273,274,272, -272,271,271,271,271,114,272,272,272,114,272,272,272,271,273,114, -114,114,114,114,114,114,114,274,114,114,114,114,114,114,114,114, -273,273,271,271,114,114,275,275,275,275,275,275,275,275,275,275, -276,276,276,276,276,276,114,114,114,277,273,273,273,273,273,273, +115,272,273,273,115,274,274,274,274,274,274,274,274,115,274,274, +274,115,274,274,274,274,274,274,274,274,274,274,274,274,274,274, +274,274,274,274,274,274,274,274,274,274,274,274,274,274,274,274, +274,274,274,274,274,274,274,274,274,274,274,115,115,274,275,273, +273,272,272,272,272,115,273,273,273,115,273,273,273,272,274,115, +115,115,115,115,115,115,115,275,115,115,115,115,115,115,115,274, +274,274,272,272,115,115,276,276,276,276,276,276,276,276,276,276, +277,277,277,277,277,277,115,115,115,278,274,274,274,274,274,274, /* block 27 */ -114,114,278,278,114,279,279,279,279,279,279,279,279,279,279,279, -279,279,279,279,279,279,279,114,114,114,279,279,279,279,279,279, -279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279, -279,279,114,279,279,279,279,279,279,279,279,279,114,279,114,114, -279,279,279,279,279,279,279,114,114,114,280,114,114,114,114,281, -278,278,280,280,280,114,280,114,278,278,278,278,278,278,278,281, -114,114,114,114,114,114,282,282,282,282,282,282,282,282,282,282, -114,114,278,278,283,114,114,114,114,114,114,114,114,114,114,114, +115,115,279,279,115,280,280,280,280,280,280,280,280,280,280,280, +280,280,280,280,280,280,280,115,115,115,280,280,280,280,280,280, +280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, +280,280,115,280,280,280,280,280,280,280,280,280,115,280,115,115, +280,280,280,280,280,280,280,115,115,115,281,115,115,115,115,282, +279,279,281,281,281,115,281,115,279,279,279,279,279,279,279,282, +115,115,115,115,115,115,283,283,283,283,283,283,283,283,283,283, +115,115,279,279,284,115,115,115,115,115,115,115,115,115,115,115, /* block 28 */ -114,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284, -284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284, -284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284, -284,285,284,286,285,285,285,285,285,285,285,114,114,114,114, 5, -284,284,284,284,284,284,287,285,285,285,285,285,285,285,285,288, -289,289,289,289,289,289,289,289,289,289,288,288,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,285,285,285,285,285,285,285,285,285,285,285,285,285,285,285, +285,285,285,285,285,285,285,285,285,285,285,285,285,285,285,285, +285,285,285,285,285,285,285,285,285,285,285,285,285,285,285,285, +285,286,285,287,286,286,286,286,286,286,286,115,115,115,115, 5, +285,285,285,285,285,285,288,286,286,286,286,286,286,286,286,289, +290,290,290,290,290,290,290,290,290,290,289,289,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 29 */ -114,290,290,114,290,114,114,290,290,114,290,114,114,290,114,114, -114,114,114,114,290,290,290,290,114,290,290,290,290,290,290,290, -114,290,290,290,114,290,114,290,114,114,290,290,114,290,290,290, -290,291,290,292,291,291,291,291,291,291,114,291,291,290,114,114, -290,290,290,290,290,114,293,114,291,291,291,291,291,291,114,114, -294,294,294,294,294,294,294,294,294,294,114,114,290,290,290,290, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,291,291,115,291,115,115,291,291,115,291,115,115,291,115,115, +115,115,115,115,291,291,291,291,115,291,291,291,291,291,291,291, +115,291,291,291,115,291,115,291,115,115,291,291,115,291,291,291, +291,292,291,293,292,292,292,292,292,292,115,292,292,291,115,115, +291,291,291,291,291,115,294,115,292,292,292,292,292,292,115,115, +295,295,295,295,295,295,295,295,295,295,115,115,291,291,291,291, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 30 */ -295,296,296,296,297,297,297,297,297,297,297,297,297,297,297,297, -297,297,297,296,297,296,296,296,298,298,296,296,296,296,296,296, -299,299,299,299,299,299,299,299,299,299,300,300,300,300,300,300, -300,300,300,300,296,298,296,298,296,298,301,302,301,302,303,303, -295,295,295,295,295,295,295,295,114,295,295,295,295,295,295,295, -295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,295, -295,295,295,295,295,295,295,295,295,295,295,295,295,114,114,114, -114,298,298,298,298,298,298,298,298,298,298,298,298,298,298,303, +296,297,297,297,298,298,298,298,298,298,298,298,298,298,298,298, +298,298,298,297,298,297,297,297,299,299,297,297,297,297,297,297, +300,300,300,300,300,300,300,300,300,300,301,301,301,301,301,301, +301,301,301,301,297,299,297,299,297,299,302,303,302,303,304,304, +296,296,296,296,296,296,296,296,115,296,296,296,296,296,296,296, +296,296,296,296,296,296,296,296,296,296,296,296,296,296,296,296, +296,296,296,296,296,296,296,296,296,296,296,296,296,115,115,115, +115,299,299,299,299,299,299,299,299,299,299,299,299,299,299,304, /* block 31 */ -298,298,298,298,298,297,298,298,295,295,295,295,295,298,298,298, -298,298,298,298,298,298,298,298,114,298,298,298,298,298,298,298, -298,298,298,298,298,298,298,298,298,298,298,298,298,298,298,298, -298,298,298,298,298,298,298,298,298,298,298,298,298,114,296,296, -296,296,296,296,296,296,298,296,296,296,296,296,296,114,296,296, -297,297,297,297,297, 19, 19, 19, 19,297,297,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +299,299,299,299,299,298,299,299,296,296,296,296,296,299,299,299, +299,299,299,299,299,299,299,299,115,299,299,299,299,299,299,299, +299,299,299,299,299,299,299,299,299,299,299,299,299,299,299,299, +299,299,299,299,299,299,299,299,299,299,299,299,299,115,297,297, +297,297,297,297,297,297,299,297,297,297,297,297,297,115,297,297, +298,298,298,298,298, 19, 19, 19, 19,298,298,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 32 */ -304,304,304,304,304,304,304,304,304,304,304,304,304,304,304,304, -304,304,304,304,304,304,304,304,304,304,304,304,304,304,304,304, -304,304,304,304,304,304,304,304,304,304,304,305,305,306,306,306, -306,307,306,306,306,306,306,306,305,306,306,307,307,306,306,304, -308,308,308,308,308,308,308,308,308,308,309,309,309,309,309,309, -304,304,304,304,304,304,307,307,306,306,304,304,304,304,306,306, -306,304,305,305,305,304,304,305,305,305,305,305,305,305,304,304, -304,306,306,306,306,304,304,304,304,304,304,304,304,304,304,304, +305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,305, +305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,305, +305,305,305,305,305,305,305,305,305,305,305,306,306,307,307,307, +307,308,307,307,307,307,307,307,306,307,307,308,308,307,307,305, +309,309,309,309,309,309,309,309,309,309,310,310,310,310,310,310, +305,305,305,305,305,305,308,308,307,307,305,305,305,305,307,307, +307,305,306,306,306,305,305,306,306,306,306,306,306,306,305,305, +305,307,307,307,307,305,305,305,305,305,305,305,305,305,305,305, /* block 33 */ -304,304,306,305,307,306,306,305,305,305,305,305,305,306,304,305, -308,308,308,308,308,308,308,308,308,308,305,305,305,306,310,310, -311,311,311,311,311,311,311,311,311,311,311,311,311,311,311,311, -311,311,311,311,311,311,311,311,311,311,311,311,311,311,311,311, -311,311,311,311,311,311,114,311,114,114,114,114,114,311,114,114, +305,305,307,306,308,307,307,306,306,306,306,306,306,307,305,306, +309,309,309,309,309,309,309,309,309,309,306,306,306,307,311,311, 312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,312, 312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,312, -312,312,312,312,312,312,312,312,312,312,312, 4,313,312,312,312, +312,312,312,312,312,312,115,312,115,115,115,115,115,312,115,115, +313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,313, +313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,313, +313,313,313,313,313,313,313,313,313,313,313, 4,314,313,313,313, /* block 34 */ -314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, -314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, -314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, -314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, -314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, -314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, 315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, 315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, +315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, +315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, +315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, +315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, +316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, +316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, /* block 35 */ -315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, -315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, -315,315,315,315,315,315,315,315,316,316,316,316,316,316,316,316, -316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, -316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, -316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, 316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, 316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, +316,316,316,316,316,316,316,316,317,317,317,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, /* block 36 */ -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,114,317,317,317,317,114,114, -317,317,317,317,317,317,317,114,317,114,317,317,317,317,114,114, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,115,318,318,318,318,115,115, +318,318,318,318,318,318,318,115,318,115,318,318,318,318,115,115, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, /* block 37 */ -317,317,317,317,317,317,317,317,317,114,317,317,317,317,114,114, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,114,317,317,317,317,114,114,317,317,317,317,317,317,317,114, -317,114,317,317,317,317,114,114,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,114,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +318,318,318,318,318,318,318,318,318,115,318,318,318,318,115,115, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,115,318,318,318,318,115,115,318,318,318,318,318,318,318,115, +318,115,318,318,318,318,115,115,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, /* block 38 */ -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,114,317,317,317,317,114,114,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,317,317,317,317,114,114,318,318,318, -319,319,319,319,319,319,319,319,319,320,320,320,320,320,320,320, -320,320,320,320,320,320,320,320,320,320,320,320,320,114,114,114, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,115,318,318,318,318,115,115,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,318,318,318,318,115,115,319,319,319, +320,320,320,320,320,320,320,320,320,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,115,115,115, /* block 39 */ -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -321,321,321,321,321,321,321,321,321,321,114,114,114,114,114,114, -322,322,322,322,322,322,322,322,322,322,322,322,322,322,322,322, -322,322,322,322,322,322,322,322,322,322,322,322,322,322,322,322, -322,322,322,322,322,322,322,322,322,322,322,322,322,322,322,322, -322,322,322,322,322,322,322,322,322,322,322,322,322,322,322,322, -322,322,322,322,322,322,322,322,322,322,322,322,322,322,322,322, -322,322,322,322,322,114,114,114,114,114,114,114,114,114,114,114, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +322,322,322,322,322,322,322,322,322,322,115,115,115,115,115,115, +323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323, +323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323, +323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323, +323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323, +323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323, +324,324,324,324,324,324,115,115,325,325,325,325,325,325,115,115, /* block 40 */ -323,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, +326,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, /* block 41 */ -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, /* block 42 */ -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,325,325,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,328,328,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, /* block 43 */ -326,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, -327,327,327,327,327,327,327,327,327,327,327,328,329,114,114,114, -330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330, -330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330, -330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330, -330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330, -330,330,330,330,330,330,330,330,330,330,330, 4, 4, 4,331,331, -331,330,330,330,330,330,330,330,330,114,114,114,114,114,114,114, +329,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330, +330,330,330,330,330,330,330,330,330,330,330,331,332,115,115,115, +333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333, +333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333, +333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333, +333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333, +333,333,333,333,333,333,333,333,333,333,333, 4, 4, 4,334,334, +334,333,333,333,333,333,333,333,333,115,115,115,115,115,115,115, /* block 44 */ -332,332,332,332,332,332,332,332,332,332,332,332,332,114,332,332, -332,332,333,333,333,114,114,114,114,114,114,114,114,114,114,114, -334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334, -334,334,335,335,335, 4, 4,114,114,114,114,114,114,114,114,114, -336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336, -336,336,337,337,114,114,114,114,114,114,114,114,114,114,114,114, -338,338,338,338,338,338,338,338,338,338,338,338,338,114,338,338, -338,114,339,339,114,114,114,114,114,114,114,114,114,114,114,114, +335,335,335,335,335,335,335,335,335,335,335,335,335,115,335,335, +335,335,336,336,336,115,115,115,115,115,115,115,115,115,115,115, +337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337, +337,337,338,338,338, 4, 4,115,115,115,115,115,115,115,115,115, +339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339, +339,339,340,340,115,115,115,115,115,115,115,115,115,115,115,115, +341,341,341,341,341,341,341,341,341,341,341,341,341,115,341,341, +341,115,342,342,115,115,115,115,115,115,115,115,115,115,115,115, /* block 45 */ -340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, -340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, -340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, -340,340,340,340,341,341,342,341,341,341,341,341,341,341,342,342, -342,342,342,342,342,342,341,342,342,341,341,341,341,341,341,341, -341,341,341,341,343,343,343,344,343,343,343,345,340,341,114,114, -346,346,346,346,346,346,346,346,346,346,114,114,114,114,114,114, -347,347,347,347,347,347,347,347,347,347,114,114,114,114,114,114, +343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343, +343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343, +343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343, +343,343,343,343,344,344,345,344,344,344,344,344,344,344,345,345, +345,345,345,345,345,345,344,345,345,344,344,344,344,344,344,344, +344,344,344,344,346,346,346,347,346,346,346,348,343,344,115,115, +349,349,349,349,349,349,349,349,349,349,115,115,115,115,115,115, +350,350,350,350,350,350,350,350,350,350,115,115,115,115,115,115, /* block 46 */ -348,348, 4, 4,348, 4,349,348,348,348,348,350,350,350,351,114, -352,352,352,352,352,352,352,352,352,352,114,114,114,114,114,114, -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,354,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,114,114,114,114,114,114,114,114, +351,351, 4, 4,351, 4,352,351,351,351,351,353,353,353,354,115, +355,355,355,355,355,355,355,355,355,355,115,115,115,115,115,115, +356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, +356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, +356,356,356,357,356,356,356,356,356,356,356,356,356,356,356,356, +356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, +356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, +356,356,356,356,356,356,356,356,115,115,115,115,115,115,115,115, /* block 47 */ -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,353,350,353,114,114,114,114,114, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,114,114,114,114,114,114,114,114,114,114, +356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, +356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, +356,356,356,356,356,356,356,356,356,353,356,115,115,115,115,115, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327, +327,327,327,327,327,327,115,115,115,115,115,115,115,115,115,115, /* block 48 */ -355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355, -355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,114, -356,356,356,357,357,357,357,356,356,357,357,357,114,114,114,114, -357,357,356,357,357,357,357,357,357,356,356,356,114,114,114,114, -358,114,114,114,359,359,360,360,360,360,360,360,360,360,360,360, -361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, -361,361,361,361,361,361,361,361,361,361,361,361,361,361,114,114, -361,361,361,361,361,114,114,114,114,114,114,114,114,114,114,114, +358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, +358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,115, +359,359,359,360,360,360,360,359,359,360,360,360,115,115,115,115, +360,360,359,360,360,360,360,360,360,359,359,359,115,115,115,115, +361,115,115,115,362,362,363,363,363,363,363,363,363,363,363,363, +364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364, +364,364,364,364,364,364,364,364,364,364,364,364,364,364,115,115, +364,364,364,364,364,115,115,115,115,115,115,115,115,115,115,115, /* block 49 */ -362,362,362,362,362,362,362,362,362,362,362,362,362,362,362,362, -362,362,362,362,362,362,362,362,362,362,362,362,362,362,362,362, -362,362,362,362,362,362,362,362,362,362,362,362,114,114,114,114, -363,363,363,363,363,364,364,364,363,363,364,363,363,363,363,363, -363,362,362,362,362,362,362,362,363,363,114,114,114,114,114,114, -365,365,365,365,365,365,365,365,365,365,366,114,114,114,367,367, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, +365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365, +365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365, +365,365,365,365,365,365,365,365,365,365,365,365,115,115,115,115, +365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365, +365,365,365,365,365,365,365,365,365,365,115,115,115,115,115,115, +366,366,366,366,366,366,366,366,366,366,367,115,115,115,368,368, +369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369, +369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369, /* block 50 */ -369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369, -369,369,369,369,369,369,369,370,370,371,371,370,114,114,372,372, -373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373, -373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373, -373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373, -373,373,373,373,373,374,375,374,375,375,375,375,375,375,375,114, -375,376,375,376,376,375,375,375,375,375,375,375,375,374,374,374, -374,374,374,375,375,375,375,375,375,375,375,375,375,114,114,375, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,371,371,372,372,371,115,115,373,373, +374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374, +374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374, +374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374, +374,374,374,374,374,375,376,375,376,376,376,376,376,376,376,115, +376,377,376,377,377,376,376,376,376,376,376,376,376,375,375,375, +375,375,375,376,376,376,376,376,376,376,376,376,376,115,115,376, /* block 51 */ -377,377,377,377,377,377,377,377,377,377,114,114,114,114,114,114, -377,377,377,377,377,377,377,377,377,377,114,114,114,114,114,114, -378,378,378,378,378,378,378,379,378,378,378,378,378,378,114,114, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,380,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +378,378,378,378,378,378,378,378,378,378,115,115,115,115,115,115, +378,378,378,378,378,378,378,378,378,378,115,115,115,115,115,115, +379,379,379,379,379,379,379,380,379,379,379,379,379,379,115,115, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,381,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 52 */ -381,381,381,381,382,383,383,383,383,383,383,383,383,383,383,383, -383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383, -383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383, -383,383,383,383,381,382,381,381,381,381,381,382,381,382,382,382, -382,382,381,382,382,383,383,383,383,383,383,383,114,114,114,114, -384,384,384,384,384,384,384,384,384,384,385,385,385,385,385,385, -385,386,386,386,386,386,386,386,386,386,386,381,381,381,381,381, -381,381,381,381,386,386,386,386,386,386,386,386,386,114,114,114, +382,382,382,382,383,384,384,384,384,384,384,384,384,384,384,384, +384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,384, +384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,384, +384,384,384,384,382,383,382,382,382,382,382,383,382,383,383,383, +383,383,382,383,383,384,384,384,384,384,384,384,115,115,115,115, +385,385,385,385,385,385,385,385,385,385,386,386,386,386,386,386, +386,387,387,387,387,387,387,387,387,387,387,382,382,382,382,382, +382,382,382,382,387,387,387,387,387,387,387,387,387,115,115,115, /* block 53 */ -387,387,388,389,389,389,389,389,389,389,389,389,389,389,389,389, -389,389,389,389,389,389,389,389,389,389,389,389,389,389,389,389, -389,388,387,387,387,387,388,388,387,387,388,387,387,387,389,389, -390,390,390,390,390,390,390,390,390,390,389,389,389,389,389,389, -391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,391, -391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,391, -391,391,391,391,391,391,392,393,392,392,393,393,393,392,393,392, -392,392,393,393,114,114,114,114,114,114,114,114,394,394,394,394, +388,388,389,390,390,390,390,390,390,390,390,390,390,390,390,390, +390,390,390,390,390,390,390,390,390,390,390,390,390,390,390,390, +390,389,388,388,388,388,389,389,388,388,389,388,388,388,390,390, +391,391,391,391,391,391,391,391,391,391,390,390,390,390,390,390, +392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392, +392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392, +392,392,392,392,392,392,393,394,393,393,394,394,394,393,394,393, +393,393,394,394,115,115,115,115,115,115,115,115,395,395,395,395, /* block 54 */ -395,395,395,395,395,395,395,395,395,395,395,395,395,395,395,395, -395,395,395,395,395,395,395,395,395,395,395,395,395,395,395,395, -395,395,395,395,396,396,396,396,396,396,396,396,397,397,397,397, -397,397,397,397,396,396,397,397,114,114,114,398,398,398,398,398, -399,399,399,399,399,399,399,399,399,399,114,114,114,395,395,395, -400,400,400,400,400,400,400,400,400,400,401,401,401,401,401,401, -401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,401, -401,401,401,401,401,401,401,401,402,402,402,402,402,402,403,403, +396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396, +396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396, +396,396,396,396,397,397,397,397,397,397,397,397,398,398,398,398, +398,398,398,398,397,397,398,398,115,115,115,399,399,399,399,399, +400,400,400,400,400,400,400,400,400,400,115,115,115,396,396,396, +401,401,401,401,401,401,401,401,401,401,402,402,402,402,402,402, +402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402, +402,402,402,402,402,402,402,402,403,403,403,403,403,403,404,404, /* block 55 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -404,404,404,404,404,404,404,404,114,114,114,114,114,114,114,114, -109,109,109, 4,109,109,109,109,109,109,109,109,109,109,109,109, -109,405,109,109,109,109,109,109,109,406,406,406,406,109,406,406, -406,406,405,405,109,406,406,114,109,109,114,114,114,114,114,114, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +405,405,405,405,405,405,405,405,115,115,115,115,115,115,115,115, +110,110,110, 4,110,110,110,110,110,110,110,110,110,110,110,110, +110,406,110,110,110,110,110,110,110,407,407,407,407,110,407,407, +407,407,406,406,110,407,407,115,110,110,115,115,115,115,115,115, /* block 56 */ 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33,122,122,122,122,122,407,106,106,106,106, -106,106,106,106,106,106,106,106,106,106,106,106,106,106,106,106, -106,106,106,106,106,106,106,106,106,106,106,106,106,106,106,106, -106,106,106,106,106,106,106,106,106,106,106,106,106,115,115,115, -115,115,106,106,106,106,115,115,115,115,115, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33,408,409, 33, 33, 33,410, 33, 33, + 33, 33, 33, 33, 33, 33,123,123,123,123,123,408,107,107,107,107, +107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107, +107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107, +107,107,107,107,107,107,107,107,107,107,107,107,107,116,116,116, +116,116,107,107,107,107,116,116,116,116,116, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33,409,410, 33, 33, 33,411, 33, 33, /* block 57 */ 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,106,106,106,106,106, -106,106,106,106,106,106,106,106,106,106,106,106,106,106,106,106, -106,106,106,106,106,106,106,106,106,106,106,106,106,106,106,115, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,114,114,114,114,114,114,109,109,109,109, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,107,107,107,107,107, +107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107, +107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,116, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,115,115,115,115,115,115,110,110,110,110, /* block 58 */ 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, @@ -1940,12 +1964,12 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, -411,412, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, +412,413, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, /* block 59 */ 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, - 30, 31, 30, 31, 30, 31, 33, 33, 33, 33, 33,413, 33, 33,414, 33, + 30, 31, 30, 31, 30, 31, 33, 33, 33, 33, 33,414, 33, 33,415, 33, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, @@ -1954,57 +1978,57 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, /* block 60 */ -415,415,415,415,415,415,415,415,416,416,416,416,416,416,416,416, -415,415,415,415,415,415,114,114,416,416,416,416,416,416,114,114, -415,415,415,415,415,415,415,415,416,416,416,416,416,416,416,416, -415,415,415,415,415,415,415,415,416,416,416,416,416,416,416,416, -415,415,415,415,415,415,114,114,416,416,416,416,416,416,114,114, -122,415,122,415,122,415,122,415,114,416,114,416,114,416,114,416, -415,415,415,415,415,415,415,415,416,416,416,416,416,416,416,416, -417,417,418,418,418,418,419,419,420,420,421,421,422,422,114,114, +416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417, +416,416,416,416,416,416,115,115,417,417,417,417,417,417,115,115, +416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417, +416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417, +416,416,416,416,416,416,115,115,417,417,417,417,417,417,115,115, +123,416,123,416,123,416,123,416,115,417,115,417,115,417,115,417, +416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417, +418,418,419,419,419,419,420,420,421,421,422,422,423,423,115,115, /* block 61 */ -415,415,415,415,415,415,415,415,423,423,423,423,423,423,423,423, -415,415,415,415,415,415,415,415,423,423,423,423,423,423,423,423, -415,415,415,415,415,415,415,415,423,423,423,423,423,423,423,423, -415,415,122,424,122,114,122,122,416,416,425,425,426,113,427,113, -113,113,122,424,122,114,122,122,428,428,428,428,426,113,113,113, -415,415,122,122,114,114,122,122,416,416,429,429,114,113,113,113, -415,415,122,122,122,163,122,122,416,416,430,430,168,113,113,113, -114,114,122,424,122,114,122,122,431,431,432,432,426,113,113,114, +416,416,416,416,416,416,416,416,424,424,424,424,424,424,424,424, +416,416,416,416,416,416,416,416,424,424,424,424,424,424,424,424, +416,416,416,416,416,416,416,416,424,424,424,424,424,424,424,424, +416,416,123,425,123,115,123,123,417,417,426,426,427,114,428,114, +114,114,123,425,123,115,123,123,429,429,429,429,427,114,114,114, +416,416,123,123,115,115,123,123,417,417,430,430,115,114,114,114, +416,416,123,123,123,164,123,123,417,417,431,431,169,114,114,114, +115,115,123,425,123,115,123,123,432,432,433,433,427,114,114,115, /* block 62 */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 22,433,433, 22, 22, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 22,434,434, 22, 22, 9, 9, 9, 9, 9, 9, 4, 4, 21, 25, 6, 21, 21, 25, 6, 21, - 4, 4, 4, 4, 4, 4, 4, 4,434,435, 22, 22, 22, 22, 22, 3, + 4, 4, 4, 4, 4, 4, 4, 4,435,436, 22, 22, 22, 22, 22, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 21, 25, 4, 4, 4, 4, 15, 15, 4, 4, 4, 8, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, - 22, 22, 22, 22, 22,436, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 23,106,114,114, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,106, + 22, 22, 22, 22, 22,437, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 23,107,115,115, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,107, /* block 63 */ - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,114, -106,106,106,106,106,106,106,106,106,106,106,106,106,114,114,114, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,115, +107,107,107,107,107,107,107,107,107,107,107,107,107,115,115,115, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -109,109,109,109,109,109,109,109,109,109,109,109,109,380,380,380, -380,109,380,380,380,109,109,109,109,109,109,109,109,109,109,109, -109,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +110,110,110,110,110,110,110,110,110,110,110,110,110,381,381,381, +381,110,381,381,381,110,110,110,110,110,110,110,110,110,110,110, +110,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 64 */ - 19, 19,437, 19, 19, 19, 19,437, 19, 19,438,437,437,437,438,438, -437,437,437,438, 19,437, 19, 19, 8,437,437,437,437,437, 19, 19, - 19, 19, 19, 19,437, 19,439, 19,437, 19,440,441,437,437, 19,438, -437,437,442,437,438,406,406,406,406,438, 19, 19,438,438,437,437, - 8, 8, 8, 8, 8,437,438,438,438,438, 19, 8, 19, 19,443, 19, + 19, 19,438, 19, 19, 19, 19,438, 19, 19,439,438,438,438,439,439, +438,438,438,439, 19,438, 19, 19, 8,438,438,438,438,438, 19, 19, + 19, 19, 19, 19,438, 19,440, 19,438, 19,441,442,438,438, 19,439, +438,438,443,438,439,407,407,407,407,439, 19, 19,439,439,438,438, + 8, 8, 8, 8, 8,438,439,439,439,439, 19, 8, 19, 19,444, 19, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, -444,444,444,444,444,444,444,444,444,444,444,444,444,444,444,444, 445,445,445,445,445,445,445,445,445,445,445,445,445,445,445,445, +446,446,446,446,446,446,446,446,446,446,446,446,446,446,446,446, /* block 65 */ -446,446,446, 30, 31,446,446,446,446, 23,114,114,114,114,114,114, +447,447,447, 30, 31,447,447,447,447, 23, 19, 19,115,115,115,115, 8, 8, 8, 8, 8, 19, 19, 19, 19, 19, 8, 8, 19, 19, 19, 19, 8, 19, 19, 8, 19, 19, 8, 19, 19, 19, 19, 19, 19, 19, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, @@ -2041,15 +2065,15 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8, 8, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115, /* block 69 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, @@ -2057,10 +2081,10 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19,447,447,447,447,447,447,447,447,447,447, -447,447,447,447,447,447,447,447,447,447,447,447,447,447,447,447, + 19, 19, 19, 19, 19, 19,448,448,448,448,448,448,448,448,448,448, 448,448,448,448,448,448,448,448,448,448,448,448,448,448,448,448, -448,448,448,448,448,448,448,448,448,448, 23, 23, 23, 23, 23, 23, +449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, +449,449,449,449,449,449,449,449,449,449, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, /* block 71 */ @@ -2114,14 +2138,14 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, /* block 76 */ -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, -449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, +450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, /* block 77 */ 8, 8, 8, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, @@ -2141,147 +2165,147 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 8, 8, 8, 8, 8, 19, 19, 8, 8, 8, 8, 8, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, /* block 79 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19,114,114, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19,114, 19, 19, 19, 19, 19, 19, - 19, 19,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19,115, 19, 19, 19, 19, 19, 19, + 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115, 19, 19, 19, 19, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 80 */ -450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, -450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450, -450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,114, 451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,451, 451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,451, -451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,114, - 30, 31,452,453,454,455,456, 30, 31, 30, 31, 30, 31,457,458,459, -460, 33, 30, 31, 33, 30, 31, 33, 33, 33, 33, 33,106,106,461,461, +451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,115, +452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,452, +452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,452, +452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,115, + 30, 31,453,454,455,456,457, 30, 31, 30, 31, 30, 31,458,459,460, +461, 33, 30, 31, 33, 30, 31, 33, 33, 33, 33, 33,107,107,462,462, /* block 81 */ -159,160,159,160,159,160,159,160,159,160,159,160,159,160,159,160, -159,160,159,160,159,160,159,160,159,160,159,160,159,160,159,160, -159,160,159,160,159,160,159,160,159,160,159,160,159,160,159,160, -159,160,159,160,159,160,159,160,159,160,159,160,159,160,159,160, -159,160,159,160,159,160,159,160,159,160,159,160,159,160,159,160, -159,160,159,160,159,160,159,160,159,160,159,160,159,160,159,160, -159,160,159,160,462,463,463,463,463,463,463,159,160,159,160,464, -464,464,159,160,114,114,114,114,114,465,465,465,465,466,465,465, +160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161, +160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161, +160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161, +160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161, +160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161, +160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161, +160,161,160,161,463,464,464,464,464,464,464,160,161,160,161,465, +465,465,160,161,115,115,115,115,115,466,466,466,466,467,466,466, /* block 82 */ -467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467, -467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467, -467,467,467,467,467,467,114,467,114,114,114,114,114,467,114,114, 468,468,468,468,468,468,468,468,468,468,468,468,468,468,468,468, 468,468,468,468,468,468,468,468,468,468,468,468,468,468,468,468, -468,468,468,468,468,468,468,468,468,468,468,468,468,468,468,468, -468,468,468,468,468,468,468,468,114,114,114,114,114,114,114,469, -470,114,114,114,114,114,114,114,114,114,114,114,114,114,114,471, +468,468,468,468,468,468,115,468,115,115,115,115,115,468,115,115, +469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,469, +469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,469, +469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,469, +469,469,469,469,469,469,469,469,115,115,115,115,115,115,115,470, +471,115,115,115,115,115,115,115,115,115,115,115,115,115,115,472, /* block 83 */ -317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, -317,317,317,317,317,317,317,114,114,114,114,114,114,114,114,114, -317,317,317,317,317,317,317,114,317,317,317,317,317,317,317,114, -317,317,317,317,317,317,317,114,317,317,317,317,317,317,317,114, -317,317,317,317,317,317,317,114,317,317,317,317,317,317,317,114, -317,317,317,317,317,317,317,114,317,317,317,317,317,317,317,114, -177,177,177,177,177,177,177,177,177,177,177,177,177,177,177,177, -177,177,177,177,177,177,177,177,177,177,177,177,177,177,177,177, +318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318, +318,318,318,318,318,318,318,115,115,115,115,115,115,115,115,115, +318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115, +318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115, +318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115, +318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115, +178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,178, +178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,178, /* block 84 */ 4, 4, 21, 25, 21, 25, 4, 4, 4, 21, 25, 4, 21, 25, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 9, 4, 21, 25, 4, 4, - 21, 25, 6, 7, 6, 7, 6, 7, 6, 7, 4, 4, 4, 4, 4,107, + 21, 25, 6, 7, 6, 7, 6, 7, 6, 7, 4, 4, 4, 4, 4,108, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 4, 4, 4, 4, - 9, 4, 6,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 9, 4, 6,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 85 */ -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,114,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,114,114,114,114,114,114,114,114,114,114,114,114, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,115,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,115,115,115,115,115,115,115,115,115,115,115,115, /* block 86 */ -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, /* block 87 */ -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, -472,472,472,472,472,472,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, +473,473,473,473,473,473,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115, /* block 88 */ - 3, 4, 4, 4, 19,473,406,474, 6, 7, 6, 7, 6, 7, 6, 7, + 3, 4, 4, 4, 19,474,407,475, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7, 9, 6, 7, 7, - 19,474,474,474,474,474,474,474,474,474,109,109,109,109,475,475, - 9,107,107,107,107,107, 19, 19,474,474,474,473,406, 4, 19, 19, -114,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476, -476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476, -476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476, -476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476, + 19,475,475,475,475,475,475,475,475,475,110,110,110,110,476,476, + 9,108,108,108,108,108, 19, 19,475,475,475,474,407, 4, 19, 19, +115,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477, +477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477, +477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477, +477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477, /* block 89 */ -476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476, -476,476,476,476,476,476,476,114,114,109,109, 14, 14,477,477,476, - 9,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, -478,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, -478,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, -478,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, -478,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, -478,478,478,478,478,478,478,478,478,478,478, 4,107,479,479,478, +477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477, +477,477,477,477,477,477,477,115,115,110,110, 14, 14,478,478,477, + 9,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,479,479,479,479,479,479,479,479,479,479, 4,108,480,480,479, /* block 90 */ -114,114,114,114,114,480,480,480,480,480,480,480,480,480,480,480, -480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480, -480,480,480,480,480,480,480,480,480,480,480,480,480,480,114,114, -114,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481, -481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481, -481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481, -481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481, +115,115,115,115,115,481,481,481,481,481,481,481,481,481,481,481, 481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481, +481,481,481,481,481,481,481,481,481,481,481,481,481,481,115,115, +115,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, +482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, +482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, +482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, +482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, /* block 91 */ -481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,114, +482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,115, 19, 19, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, -480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480, -480,480,480,480,480,480,480,480,480,480,480,114,114,114,114,114, +481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481, +481,481,481,481,481,481,481,481,481,481,481,115,115,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19,114,114,114,114,114,114,114,114,114,114,114,114, -478,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, + 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, /* block 92 */ -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,114, +483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, +483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,115, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 23, 23, 23, 23, 23, 23, 23, 23, 19, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, 19, +483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, +483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, 19, /* block 93 */ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, @@ -2289,1229 +2313,1229 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,114, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,115, /* block 94 */ -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483, 19, 19, 19, 19, 19, 19, 19, 19, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +484,484,484,484,484,484,484,484, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, /* block 95 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, /* block 96 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,114,114,114,114,114,114,114,114,114,114, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,115,115,115,115,115,115,115,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, /* block 97 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 98 */ -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,486,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,487,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, /* block 99 */ -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, -485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, /* block 100 */ -485,485,485,485,485,485,485,485,485,485,485,485,485,114,114,114, -487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487, -487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487, -487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487, -487,487,487,487,487,487,487,114,114,114,114,114,114,114,114,114, +486,486,486,486,486,486,486,486,486,486,486,486,486,115,115,115, 488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488, 488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488, -488,488,488,488,488,488,488,488,489,489,489,489,489,489,490,490, +488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488, +488,488,488,488,488,488,488,115,115,115,115,115,115,115,115,115, +489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489, +489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489, +489,489,489,489,489,489,489,489,490,490,490,490,490,490,491,491, /* block 101 */ -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, /* block 102 */ -491,491,491,491,491,491,491,491,491,491,491,491,492,493,493,493, -491,491,491,491,491,491,491,491,491,491,491,491,491,491,491,491, -494,494,494,494,494,494,494,494,494,494,491,491,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,174,175,495,177, -178,178,178,496,177,177,177,177,177,177,177,177,177,177,496,408, +492,492,492,492,492,492,492,492,492,492,492,492,493,494,494,494, +492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, +495,495,495,495,495,495,495,495,495,495,492,492,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,496,178, +179,179,179,497,178,178,178,178,178,178,178,178,178,178,497,409, /* block 103 */ -174,175,174,175,174,175,174,175,174,175,174,175,174,175,174,175, -174,175,174,175,174,175,174,175,174,175,174,175,408,408,114,177, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,498,498,498,498,498,498,498,498,498,498, -499,499,500,500,500,500,500,500,114,114,114,114,114,114,114,114, +175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176, +175,176,175,176,175,176,175,176,175,176,175,176,409,409,178,178, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,499,499,499,499,499,499,499,499,499,499, +500,500,501,501,501,501,501,501,115,115,115,115,115,115,115,115, /* block 104 */ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14,107,107,107,107,107,107,107,107,107, + 14, 14, 14, 14, 14, 14, 14,108,108,108,108,108,108,108,108,108, 14, 14, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 33, 33, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, -106, 33, 33, 33, 33, 33, 33, 33, 33, 30, 31, 30, 31,501, 30, 31, +107, 33, 33, 33, 33, 33, 33, 33, 33, 30, 31, 30, 31,502, 30, 31, /* block 105 */ - 30, 31, 30, 31, 30, 31, 30, 31,107, 14, 14, 30, 31,502, 33,114, + 30, 31, 30, 31, 30, 31, 30, 31,108, 14, 14, 30, 31,503, 33, 20, 30, 31, 30, 31, 33, 33, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, - 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,503,504,505,506,114,114, -507,508,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114, 20,106,106, 33, 20, 20, 20, 20, 20, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,504,505,506,507,115,115, +508,509,510,511, 30, 31, 30, 31,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115, 20,107,107, 33, 20, 20, 20, 20, 20, /* block 106 */ -509,509,510,509,509,509,510,509,509,509,509,510,509,509,509,509, -509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509, -509,509,509,511,511,510,510,511,512,512,512,512,114,114,114,114, - 23, 23, 23, 23, 23, 23, 19, 19, 5, 19,114,114,114,114,114,114, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,514,514,514,514,114,114,114,114,114,114,114,114, +512,512,513,512,512,512,513,512,512,512,512,513,512,512,512,512, +512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512, +512,512,512,514,514,513,513,514,515,515,515,515,115,115,115,115, + 23, 23, 23, 23, 23, 23, 19, 19, 5, 19,115,115,115,115,115,115, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,517,517,517,517,115,115,115,115,115,115,115,115, /* block 107 */ -515,515,516,516,516,516,516,516,516,516,516,516,516,516,516,516, -516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, -516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, -516,516,516,516,515,515,515,515,515,515,515,515,515,515,515,515, -515,515,515,515,517,114,114,114,114,114,114,114,114,114,518,518, -519,519,519,519,519,519,519,519,519,519,114,114,114,114,114,114, -221,221,221,221,221,221,221,221,221,221,221,221,221,221,221,221, -221,221,223,223,223,223,223,223,225,225,225,223,114,114,114,114, +518,518,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,520,115,115,115,115,115,115,115,115,115,521,521, +522,522,522,522,522,522,522,522,522,522,115,115,115,115,115,115, +222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222, +222,222,224,224,224,224,224,224,226,226,226,224,226,224,115,115, /* block 108 */ -520,520,520,520,520,520,520,520,520,520,521,521,521,521,521,521, -521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,521, -521,521,521,521,521,521,522,522,522,522,522,522,522,522, 4,523, +523,523,523,523,523,523,523,523,523,523,524,524,524,524,524,524, 524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524, -524,524,524,524,524,524,524,525,525,525,525,525,525,525,525,525, -525,525,526,526,114,114,114,114,114,114,114,114,114,114,114,527, -314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, -314,314,314,314,314,314,314,314,314,314,314,314,314,114,114,114, +524,524,524,524,524,524,525,525,525,525,525,525,525,525, 4,526, +527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,527, +527,527,527,527,527,527,527,528,528,528,528,528,528,528,528,528, +528,528,529,529,115,115,115,115,115,115,115,115,115,115,115,530, +315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, +315,315,315,315,315,315,315,315,315,315,315,315,315,115,115,115, /* block 109 */ -528,528,528,529,530,530,530,530,530,530,530,530,530,530,530,530, -530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530, -530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530, -530,530,530,528,529,529,528,528,528,528,529,529,528,529,529,529, -529,531,531,531,531,531,531,531,531,531,531,531,531,531,114,107, -532,532,532,532,532,532,532,532,532,532,114,114,114,114,531,531, -304,304,304,304,304,306,533,304,304,304,304,304,304,304,304,304, -308,308,308,308,308,308,308,308,308,308,304,304,304,304,304,114, +531,531,531,532,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,531,532,532,531,531,531,531,532,532,531,532,532,532, +532,534,534,534,534,534,534,534,534,534,534,534,534,534,115,108, +535,535,535,535,535,535,535,535,535,535,115,115,115,115,534,534, +305,305,305,305,305,307,536,305,305,305,305,305,305,305,305,305, +309,309,309,309,309,309,309,309,309,309,305,305,305,305,305,115, /* block 110 */ -534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, -534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, -534,534,534,534,534,534,534,534,534,535,535,535,535,535,535,536, -536,535,535,536,536,535,535,114,114,114,114,114,114,114,114,114, -534,534,534,535,534,534,534,534,534,534,534,534,535,536,114,114, -537,537,537,537,537,537,537,537,537,537,114,114,538,538,538,538, -304,304,304,304,304,304,304,304,304,304,304,304,304,304,304,304, -533,304,304,304,304,304,304,310,310,310,304,305,306,305,304,304, +537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537, +537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537, +537,537,537,537,537,537,537,537,537,538,538,538,538,538,538,539, +539,538,538,539,539,538,538,115,115,115,115,115,115,115,115,115, +537,537,537,538,537,537,537,537,537,537,537,537,538,539,115,115, +540,540,540,540,540,540,540,540,540,540,115,115,541,541,541,541, +305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,305, +536,305,305,305,305,305,305,311,311,311,305,306,307,306,305,305, /* block 111 */ -539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539, -539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539, -539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539, -540,539,540,540,540,539,539,540,540,539,539,539,539,539,540,540, -539,540,539,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,539,539,541,542,542, -543,543,543,543,543,543,543,543,543,543,543,544,545,545,544,544, -546,546,543,547,547,544,545,114,114,114,114,114,114,114,114,114, +542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542, +542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542, +542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542, +543,542,543,543,543,542,542,543,543,542,542,542,542,542,543,543, +542,543,542,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,542,542,544,545,545, +546,546,546,546,546,546,546,546,546,546,546,547,548,548,547,547, +549,549,546,550,550,547,548,115,115,115,115,115,115,115,115,115, /* block 112 */ -114,317,317,317,317,317,317,114,114,317,317,317,317,317,317,114, -114,317,317,317,317,317,317,114,114,114,114,114,114,114,114,114, -317,317,317,317,317,317,317,114,317,317,317,317,317,317,317,114, +115,318,318,318,318,318,318,115,115,318,318,318,318,318,318,115, +115,318,318,318,318,318,318,115,115,115,115,115,115,115,115,115, +318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 14,106,106,106,106, -114,114,114,114, 33,122,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 33, 33, 33,551, 33, 33, 33, 33, 33, 33, 33, 14,107,107,107,107, + 33, 33, 33, 33, 33,123,115,115,115,115,115,115,115,115,115,115, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, /* block 113 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543, -543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543, -543,543,543,544,544,545,544,544,545,544,544,546,544,545,114,114, -548,548,548,548,548,548,548,548,548,548,114,114,114,114,114,114, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546, +546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546, +546,546,546,547,547,548,547,547,548,547,547,549,547,548,115,115, +553,553,553,553,553,553,553,553,553,553,115,115,115,115,115,115, /* block 114 */ -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, /* block 115 */ -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, /* block 116 */ -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, /* block 117 */ -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, /* block 118 */ -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, /* block 119 */ -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, /* block 120 */ -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -549,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,549,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,549,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, /* block 121 */ -550,550,550,550,550,550,550,550,549,550,550,550,550,550,550,550, -550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550, -550,550,550,550,114,114,114,114,114,114,114,114,114,114,114,114, -315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, -315,315,315,315,315,315,315,114,114,114,114,316,316,316,316,316, +555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,555,555,115,115,115,115,115,115,115,115,115,115,115,115, 316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, -316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316, -316,316,316,316,316,316,316,316,316,316,316,316,114,114,114,114, +316,316,316,316,316,316,316,115,115,115,115,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317, +317,317,317,317,317,317,317,317,317,317,317,317,115,115,115,115, /* block 122 */ -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, -551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, +556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, /* block 123 */ -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, /* block 124 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,114,114, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,115,115, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, /* block 125 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 126 */ - 33, 33, 33, 33, 33, 33, 33,114,114,114,114,114,114,114,114,114, -114,114,114,185,185,185,185,185,114,114,114,114,114,192,189,192, -192,192,192,192,192,192,192,192,192,553,192,192,192,192,192,192, -192,192,192,192,192,192,192,114,192,192,192,192,192,114,192,114, -192,192,114,192,192,114,192,192,192,192,192,192,192,192,192,192, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, + 33, 33, 33, 33, 33, 33, 33,115,115,115,115,115,115,115,115,115, +115,115,115,186,186,186,186,186,115,115,115,115,115,193,190,193, +193,193,193,193,193,193,193,193,193,558,193,193,193,193,193,193, +193,193,193,193,193,193,193,115,193,193,193,193,193,115,193,115, +193,193,115,193,193,115,193,193,193,193,193,193,193,193,193,193, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, /* block 127 */ -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, /* block 128 */ -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, /* block 129 */ -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199, 7, 6, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200, 7, 6, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, /* block 130 */ -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -114,114,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -199,199,199,199,199,199,199,199,199,199,199,199,196,197,114,114, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +115,115,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +200,200,200,200,200,200,200,200,200,200,200,200,197,198,115,115, /* block 131 */ -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, - 4, 4, 4, 4, 4, 4, 4, 6, 7, 4,114,114,114,114,114,114, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,114,114, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, + 4, 4, 4, 4, 4, 4, 4, 6, 7, 4,115,115,115,115,115,115, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,178,178, 4, 9, 9, 15, 15, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 4, 4, 6, 7, 4, 4, 4, 4, 15, 15, 15, - 4, 4, 4,114, 4, 4, 4, 4, 9, 6, 7, 6, 7, 6, 7, 4, - 4, 4, 8, 9, 8, 8, 8,114, 4, 5, 4, 4,114,114,114,114, -199,199,199,199,199,114,199,199,199,199,199,199,199,199,199,199, + 4, 4, 4,115, 4, 4, 4, 4, 9, 6, 7, 6, 7, 6, 7, 4, + 4, 4, 8, 9, 8, 8, 8,115, 4, 5, 4, 4,115,115,115,115, +200,200,200,200,200,115,200,200,200,200,200,200,200,200,200,200, /* block 132 */ -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,114,114, 22, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,115,115, 22, /* block 133 */ -114, 4, 4, 4, 5, 4, 4, 4, 6, 7, 4, 8, 4, 9, 4, 4, +115, 4, 4, 4, 5, 4, 4, 4, 6, 7, 4, 8, 4, 9, 4, 4, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 4, 4, 8, 8, 8, 4, 4, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 4, 7, 14, 15, 14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 6, 8, 7, 8, 6, - 7, 4, 6, 7, 4, 4,478,478,478,478,478,478,478,478,478,478, -107,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, + 7, 4, 6, 7, 4, 4,479,479,479,479,479,479,479,479,479,479, +108,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, /* block 134 */ -478,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478, -478,478,478,478,478,478,478,478,478,478,478,478,478,478,555,555, -481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481, -481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,114, -114,114,481,481,481,481,481,481,114,114,481,481,481,481,481,481, -114,114,481,481,481,481,481,481,114,114,481,481,481,114,114,114, - 5, 5, 8, 14, 19, 5, 5,114, 19, 8, 8, 8, 8, 19, 19,114, -436,436,436,436,436,436,436,436,436, 22, 22, 22, 19, 19,114,114, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,560,560, +482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, +482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,115, +115,115,482,482,482,482,482,482,115,115,482,482,482,482,482,482, +115,115,482,482,482,482,482,482,115,115,482,482,482,115,115,115, + 5, 5, 8, 14, 19, 5, 5,115, 19, 8, 8, 8, 8, 19, 19,115, +437,437,437,437,437,437,437,437,437, 22, 22, 22, 19, 19,115,115, /* block 135 */ -556,556,556,556,556,556,556,556,556,556,556,556,114,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,114,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,114,556,556,114,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,114,114, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +561,561,561,561,561,561,561,561,561,561,561,561,115,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,115,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,115,561,561,115,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,115,115, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 136 */ -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556, -556,556,556,556,556,556,556,556,556,556,556,114,114,114,114,114, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, +561,561,561,561,561,561,561,561,561,561,561,115,115,115,115,115, /* block 137 */ - 4, 4, 4,114,114,114,114, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 4, 4, 4,115,115,115,115, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23,114,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, -557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, -557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, -557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, -557,557,557,557,557,558,558,558,558,559,559,559,559,559,559,559, + 23, 23, 23, 23,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, +562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562, +562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562, +562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562, +562,562,562,562,562,563,563,563,563,564,564,564,564,564,564,564, /* block 138 */ -559,559,559,559,559,559,559,559,559,559,558,558,559,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114, -559,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +564,564,564,564,564,564,564,564,564,564,563,563,564,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115, +564,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,109,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,110,115,115, /* block 139 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 140 */ -560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,560, -560,560,560,560,560,560,560,560,560,560,560,560,560,114,114,114, -561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, -561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, -561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561, -561,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -109, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,114,114,114,114, +565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,565, +565,565,565,565,565,565,565,565,565,565,565,565,565,115,115,115, +566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566, +566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566, +566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566, +566,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +110, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,115,115,115,115, /* block 141 */ -562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562, -562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562, -563,563,563,563,114,114,114,114,114,114,114,114,114,114,114,114, -564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564, -564,565,564,564,564,564,564,564,564,564,565,114,114,114,114,114, -566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566, -566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566, -566,566,566,566,566,566,567,567,567,567,567,114,114,114,114,114, +567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567, +567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567, +568,568,568,568,115,115,115,115,115,115,115,115,115,115,115,115, +569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569, +569,570,569,569,569,569,569,569,569,569,570,115,115,115,115,115, +571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571, +571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571, +571,571,571,571,571,571,572,572,572,572,572,115,115,115,115,115, /* block 142 */ -568,568,568,568,568,568,568,568,568,568,568,568,568,568,568,568, -568,568,568,568,568,568,568,568,568,568,568,568,568,568,114,569, -570,570,570,570,570,570,570,570,570,570,570,570,570,570,570,570, -570,570,570,570,570,570,570,570,570,570,570,570,570,570,570,570, -570,570,570,570,114,114,114,114,570,570,570,570,570,570,570,570, -571,572,572,572,572,572,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, +573,573,573,573,573,573,573,573,573,573,573,573,573,573,115,574, +575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,575, +575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,575, +575,575,575,575,115,115,115,115,575,575,575,575,575,575,575,575, +576,577,577,577,577,577,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 143 */ -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,574,574,574,574,574,574,574,574, -574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574, -574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574, -575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,575, -575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,575, -575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,575, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,579,579,579,579,579,579,579,579, +579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,579, +579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,579, +580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580, +580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580, +580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580, /* block 144 */ -576,576,576,576,576,576,576,576,576,576,576,576,576,576,576,576, -576,576,576,576,576,576,576,576,576,576,576,576,576,576,114,114, -577,577,577,577,577,577,577,577,577,577,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, +581,581,581,581,581,581,581,581,581,581,581,581,581,581,115,115, +582,582,582,582,582,582,582,582,582,582,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 145 */ -578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, -578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, -578,578,578,578,578,578,578,578,114,114,114,114,114,114,114,114, -579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,579, -579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,579, -579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,579, -579,579,579,579,114,114,114,114,114,114,114,114,114,114,114,580, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,115,115,115,115,115,115,115,115, +584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584, +584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584, +584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584, +584,584,584,584,115,115,115,115,115,115,115,115,115,115,115,585, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 146 */ -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, /* block 147 */ -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,114,114,114,114,114,114,114,114,114, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,114,114,114,114,114,114,114,114,114,114, -581,581,581,581,581,581,581,581,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,115,115,115,115,115,115,115,115,115, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,115,115,115,115,115,115,115,115,115,115, +586,586,586,586,586,586,586,586,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 148 */ -582,582,582,582,582,582,114,114,582,114,582,582,582,582,582,582, -582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582, -582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582, -582,582,582,582,582,582,114,582,582,114,114,114,582,114,114,582, -583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, -583,583,583,583,583,583,114,584,585,585,585,585,585,585,585,585, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,587,587,588,588,588,588,588,588,588, +587,587,587,587,587,587,115,115,587,115,587,587,587,587,587,587, +587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, +587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, +587,587,587,587,587,587,115,587,587,115,115,115,587,115,115,587, +588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588, +588,588,588,588,588,588,115,589,590,590,590,590,590,590,590,590, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,592,592,593,593,593,593,593,593,593, /* block 149 */ -589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589, -589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,114, -114,114,114,114,114,114,114,590,590,590,590,590,590,590,590,590, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594, +594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,115, +115,115,115,115,115,115,115,595,595,595,595,595,595,595,595,595, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596, +596,596,596,115,596,596,115,115,115,115,115,597,597,597,597,597, /* block 150 */ -591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, -591,591,591,591,591,591,592,592,592,592,592,592,114,114,114,593, -594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594, -594,594,594,594,594,594,594,594,594,594,114,114,114,114,114,595, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,599,599,599,599,599,599,115,115,115,600, +601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, +601,601,601,601,601,601,601,601,601,601,115,115,115,115,115,602, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 151 */ -596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596, -596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596, -597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597, -597,597,597,597,597,597,597,597,114,114,114,114,114,114,597,597, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603, +603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603, +604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604, +604,604,604,604,604,604,604,604,115,115,115,115,605,605,604,604, +605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605, +115,115,605,605,605,605,605,605,605,605,605,605,605,605,605,605, +605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605, +605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605, /* block 152 */ -598,599,599,599,114,599,599,114,114,114,114,114,599,599,599,599, -598,598,598,598,114,598,598,598,114,598,598,598,598,598,598,598, -598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, -598,598,598,598,114,114,114,114,599,599,599,114,114,114,114,599, -600,600,600,600,600,600,600,600,114,114,114,114,114,114,114,114, -601,601,601,601,601,601,601,601,601,114,114,114,114,114,114,114, -602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602, -602,602,602,602,602,602,602,602,602,602,602,602,602,603,603,604, +606,607,607,607,115,607,607,115,115,115,115,115,607,607,607,607, +606,606,606,606,115,606,606,606,115,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,115,115,115,115,607,607,607,115,115,115,115,607, +608,608,608,608,608,608,608,608,115,115,115,115,115,115,115,115, +609,609,609,609,609,609,609,609,609,115,115,115,115,115,115,115, +610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,610, +610,610,610,610,610,610,610,610,610,610,610,610,610,611,611,612, /* block 153 */ -605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605, -605,605,605,605,605,605,605,605,605,605,605,605,605,606,606,606, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -607,607,607,607,607,607,607,607,608,607,607,607,607,607,607,607, -607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607, -607,607,607,607,607,609,609,114,114,114,114,610,610,610,610,610, -611,611,611,611,611,611,611,114,114,114,114,114,114,114,114,114, +613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613, +613,613,613,613,613,613,613,613,613,613,613,613,613,614,614,614, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +615,615,615,615,615,615,615,615,616,615,615,615,615,615,615,615, +615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615, +615,615,615,615,615,617,617,115,115,115,115,618,618,618,618,618, +619,619,619,619,619,619,619,115,115,115,115,115,115,115,115,115, /* block 154 */ -612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612, -612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612, -612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612, -612,612,612,612,612,612,114,114,114,613,613,613,613,613,613,613, -614,614,614,614,614,614,614,614,614,614,614,614,614,614,614,614, -614,614,614,614,614,614,114,114,615,615,615,615,615,615,615,615, -616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616, -616,616,616,114,114,114,114,114,617,617,617,617,617,617,617,617, +620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620, +620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620, +620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620, +620,620,620,620,620,620,115,115,115,621,621,621,621,621,621,621, +622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,622, +622,622,622,622,622,622,115,115,623,623,623,623,623,623,623,623, +624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,624, +624,624,624,115,115,115,115,115,625,625,625,625,625,625,625,625, /* block 155 */ -618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618, -618,618,114,114,114,114,114,114,114,619,619,619,619,114,114,114, -114,114,114,114,114,114,114,114,114,620,620,620,620,620,620,620, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626, +626,626,115,115,115,115,115,115,115,627,627,627,627,115,115,115, +115,115,115,115,115,115,115,115,115,628,628,628,628,628,628,628, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 156 */ -621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621, -621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621, -621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621, -621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621, -621,621,621,621,621,621,621,621,621,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629, +629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629, +629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629, +629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629, +629,629,629,629,629,629,629,629,629,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 157 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,622, -622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,114, +630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630, +630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630, +630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630, +630,630,630,115,115,115,115,115,115,115,115,115,115,115,115,115, +631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631, +631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631, +631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631, +631,631,631,115,115,115,115,115,115,115,632,632,632,632,632,632, /* block 158 */ -623,624,623,625,625,625,625,625,625,625,625,625,625,625,625,625, -625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625, -625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625, -625,625,625,625,625,625,625,625,624,624,624,624,624,624,624,624, -624,624,624,624,624,624,624,626,626,626,626,626,626,626,114,114, -114,114,627,627,627,627,627,627,627,627,627,627,627,627,627,627, -627,627,627,627,627,627,628,628,628,628,628,628,628,628,628,628, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,624, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,633, +633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,115, /* block 159 */ -629,629,630,631,631,631,631,631,631,631,631,631,631,631,631,631, -631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631, -631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631, -630,630,630,629,629,629,629,630,630,629,629,632,632,633,632,632, -632,632,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -634,634,634,634,634,634,634,634,634,634,634,634,634,634,634,634, -634,634,634,634,634,634,634,634,634,114,114,114,114,114,114,114, -635,635,635,635,635,635,635,635,635,635,114,114,114,114,114,114, +634,635,634,636,636,636,636,636,636,636,636,636,636,636,636,636, +636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636, +636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636, +636,636,636,636,636,636,636,636,635,635,635,635,635,635,635,635, +635,635,635,635,635,635,635,637,637,637,637,637,637,637,115,115, +115,115,638,638,638,638,638,638,638,638,638,638,638,638,638,638, +638,638,638,638,638,638,639,639,639,639,639,639,639,639,639,639, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,635, /* block 160 */ -636,636,636,637,637,637,637,637,637,637,637,637,637,637,637,637, -637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,637, -637,637,637,637,637,637,637,636,636,636,636,636,638,636,636,636, -636,636,636,636,636,114,639,639,639,639,639,639,639,639,639,639, -640,640,640,640,114,114,114,114,114,114,114,114,114,114,114,114, -641,641,641,641,641,641,641,641,641,641,641,641,641,641,641,641, -641,641,641,641,641,641,641,641,641,641,641,641,641,641,641,641, -641,641,641,642,643,643,641,114,114,114,114,114,114,114,114,114, +640,640,641,642,642,642,642,642,642,642,642,642,642,642,642,642, +642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642, +642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642, +641,641,641,640,640,640,640,641,641,640,640,643,643,644,643,643, +643,643,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,645, +645,645,645,645,645,645,645,645,645,115,115,115,115,115,115,115, +646,646,646,646,646,646,646,646,646,646,115,115,115,115,115,115, /* block 161 */ -644,644,645,646,646,646,646,646,646,646,646,646,646,646,646,646, -646,646,646,646,646,646,646,646,646,646,646,646,646,646,646,646, -646,646,646,646,646,646,646,646,646,646,646,646,646,646,646,646, -646,646,646,645,645,645,644,644,644,644,644,644,644,644,644,645, -645,646,646,646,646,647,647,647,647,114,114,114,114,647,114,114, -648,648,648,648,648,648,648,648,648,648,646,114,114,114,114,114, -114,649,649,649,649,649,649,649,649,649,649,649,649,649,649,649, -649,649,649,649,649,114,114,114,114,114,114,114,114,114,114,114, +647,647,647,648,648,648,648,648,648,648,648,648,648,648,648,648, +648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648, +648,648,648,648,648,648,648,647,647,647,647,647,649,647,647,647, +647,647,647,647,647,115,650,650,650,650,650,650,650,650,650,650, +651,651,651,651,115,115,115,115,115,115,115,115,115,115,115,115, +652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652, +652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652, +652,652,652,653,654,654,652,115,115,115,115,115,115,115,115,115, /* block 162 */ -650,650,650,650,650,650,650,650,650,650,650,650,650,650,650,650, -650,650,114,650,650,650,650,650,650,650,650,650,650,650,650,650, -650,650,650,650,650,650,650,650,650,650,650,650,651,651,651,652, -652,652,651,651,652,651,652,652,653,653,653,653,653,653,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +655,655,656,657,657,657,657,657,657,657,657,657,657,657,657,657, +657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657, +657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657, +657,657,657,656,656,656,655,655,655,655,655,655,655,655,655,656, +656,657,657,657,657,658,658,658,658,658,655,655,655,658,115,115, +659,659,659,659,659,659,659,659,659,659,657,658,657,658,658,658, +115,660,660,660,660,660,660,660,660,660,660,660,660,660,660,660, +660,660,660,660,660,115,115,115,115,115,115,115,115,115,115,115, /* block 163 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654, -654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654, -654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,655, -656,656,656,655,655,655,655,655,655,655,655,114,114,114,114,114, -657,657,657,657,657,657,657,657,657,657,114,114,114,114,114,114, +661,661,661,661,661,661,661,661,661,661,661,661,661,661,661,661, +661,661,115,661,661,661,661,661,661,661,661,661,661,661,661,661, +661,661,661,661,661,661,661,661,661,661,661,661,662,662,662,663, +663,663,662,662,663,662,663,663,664,664,664,664,664,664,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 164 */ -114,658,659,659,114,660,660,660,660,660,660,660,660,114,114,660, -660,114,114,660,660,660,660,660,660,660,660,660,660,660,660,660, -660,660,660,660,660,660,660,660,660,114,660,660,660,660,660,660, -660,114,660,660,114,660,660,660,660,660,114,114,658,660,661,659, -658,659,659,659,659,114,114,659,659,114,114,659,659,659,114,114, -114,114,114,114,114,114,114,661,114,114,114,114,114,660,660,660, -660,660,659,659,114,114,658,658,658,658,658,658,658,114,114,114, -658,658,658,658,658,114,114,114,114,114,114,114,114,114,114,114, +665,665,665,665,665,665,665,115,665,115,665,665,665,665,115,665, +665,665,665,665,665,665,665,665,665,665,665,665,665,665,115,665, +665,665,665,665,665,665,665,665,665,666,115,115,115,115,115,115, +667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667, +667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667, +667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,668, +669,669,669,668,668,668,668,668,668,668,668,115,115,115,115,115, +670,670,670,670,670,670,670,670,670,670,115,115,115,115,115,115, /* block 165 */ -662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662, -662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662, -662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662, -663,664,664,665,665,665,665,665,665,664,665,664,664,663,664,665, -665,664,665,665,662,662,666,662,114,114,114,114,114,114,114,114, -667,667,667,667,667,667,667,667,667,667,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +671,671,672,672,115,673,673,673,673,673,673,673,673,115,115,673, +673,115,115,673,673,673,673,673,673,673,673,673,673,673,673,673, +673,673,673,673,673,673,673,673,673,115,673,673,673,673,673,673, +673,115,673,673,115,673,673,673,673,673,115,115,671,673,674,672, +671,672,672,672,672,115,115,672,672,115,115,672,672,672,115,115, +673,115,115,115,115,115,115,674,115,115,115,115,115,673,673,673, +673,673,672,672,115,115,671,671,671,671,671,671,671,115,115,115, +671,671,671,671,671,115,115,115,115,115,115,115,115,115,115,115, /* block 166 */ -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668, -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668, -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,669, -670,670,671,671,671,671,114,114,670,670,670,670,671,671,670,671, -671,672,672,672,672,672,672,672,672,672,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675, +675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675, +675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675, +676,677,677,678,678,678,678,678,678,677,678,677,677,676,677,678, +678,677,678,678,675,675,679,675,115,115,115,115,115,115,115,115, +680,680,680,680,680,680,680,680,680,680,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 167 */ -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -674,674,674,675,675,675,675,675,675,675,675,674,674,675,674,675, -675,676,676,676,673,114,114,114,114,114,114,114,114,114,114,114, -677,677,677,677,677,677,677,677,677,677,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681, +681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681, +681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,682, +683,683,684,684,684,684,115,115,683,683,683,683,684,684,683,684, +684,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,681,681,681,681,684,684,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 168 */ -678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, -678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, -678,678,678,678,678,678,678,678,678,678,678,679,680,679,680,680, -679,679,679,679,679,679,680,679,114,114,114,114,114,114,114,114, -681,681,681,681,681,681,681,681,681,681,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686, +686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686, +686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686, +687,687,687,688,688,688,688,688,688,688,688,687,687,688,687,688, +688,689,689,689,686,115,115,115,115,115,115,115,115,115,115,115, +690,690,690,690,690,690,690,690,690,690,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 169 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682, -682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682, -683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683, -683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683, -684,684,684,684,684,684,684,684,684,684,685,685,685,685,685,685, -685,685,685,114,114,114,114,114,114,114,114,114,114,114,114,686, +691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, +691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, +691,691,691,691,691,691,691,691,691,691,691,692,693,692,693,693, +692,692,692,692,692,692,693,692,115,115,115,115,115,115,115,115, +694,694,694,694,694,694,694,694,694,694,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 170 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687, -687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687, -687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687, -687,687,687,687,687,687,687,687,687,114,114,114,114,114,114,114, +695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695, +695,695,695,695,695,695,695,695,695,695,115,115,115,696,696,696, +697,697,696,696,696,696,697,696,696,696,696,696,115,115,115,115, +698,698,698,698,698,698,698,698,698,698,699,699,700,700,700,701, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 171 */ -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702, +702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702, +703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703, +703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703, +704,704,704,704,704,704,704,704,704,704,705,705,705,705,705,705, +705,705,705,115,115,115,115,115,115,115,115,115,115,115,115,706, /* block 172 */ -688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, -688,688,688,688,688,688,688,688,688,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707, +707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707, +707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707, +707,707,707,707,707,707,707,707,707,115,115,115,115,115,115,115, /* block 173 */ -689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689, -689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689, -689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689, -689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689, -689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689, -689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689, -689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,114, -690,690,690,690,690,114,114,114,114,114,114,114,114,114,114,114, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, /* block 174 */ -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 175 */ -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691, -691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, +709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, +709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, +709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, +709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, +709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, +709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,115, +710,710,710,710,710,115,115,115,115,115,115,115,115,115,115,115, /* block 176 */ -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +708,708,708,708,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 177 */ -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,497,497,497,497,497,497,497, -497,497,497,497,497,497,497,497,497,114,114,114,114,114,114,114, -692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692, -692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,114, -693,693,693,693,693,693,693,693,693,693,114,114,114,114,694,694, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, /* block 178 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695, -695,695,695,695,695,695,695,695,695,695,695,695,695,695,114,114, -696,696,696,696,696,697,114,114,114,114,114,114,114,114,114,114, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 179 */ -698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698, -698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698, -698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698, -699,699,699,699,699,699,699,700,700,700,700,700,701,701,701,701, -702,702,702,702,700,701,114,114,114,114,114,114,114,114,114,114, -703,703,703,703,703,703,703,703,703,703,114,704,704,704,704,704, -704,704,114,698,698,698,698,698,698,698,698,698,698,698,698,698, -698,698,698,698,698,698,698,698,114,114,114,114,114,698,698,698, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, /* block 180 */ -698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, +712,712,712,712,712,712,712,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 181 */ -705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705, -705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705, -705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705, -705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705, -705,705,705,705,705,114,114,114,114,114,114,114,114,114,114,114, -705,706,706,706,706,706,706,706,706,706,706,706,706,706,706,706, -706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,706, -706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,114, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, /* block 182 */ -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,707, -707,707,707,708,708,708,708,708,708,708,708,708,708,708,708,708, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498, +498,498,498,498,498,498,498,498,498,115,115,115,115,115,115,115, +713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713, +713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,115, +714,714,714,714,714,714,714,714,714,714,115,115,115,115,715,715, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 183 */ -478,476,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716, +716,716,716,716,716,716,716,716,716,716,716,716,716,716,115,115, +717,717,717,717,717,718,115,115,115,115,115,115,115,115,115,115, /* block 184 */ -709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,709,709,709,709,709,709,709,114,114,114,114,114, -709,709,709,709,709,709,709,709,709,709,709,709,709,114,114,114, +719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719, +719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719, +719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719, +720,720,720,720,720,720,720,721,721,721,721,721,722,722,722,722, +723,723,723,723,721,722,115,115,115,115,115,115,115,115,115,115, +724,724,724,724,724,724,724,724,724,724,115,725,725,725,725,725, +725,725,115,719,719,719,719,719,719,719,719,719,719,719,719,719, +719,719,719,719,719,719,719,719,115,115,115,115,115,719,719,719, /* block 185 */ -709,709,709,709,709,709,709,709,709,114,114,114,114,114,114,114, -709,709,709,709,709,709,709,709,709,709,114,114,710,711,711,712, - 22, 22, 22, 22,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 186 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114,114,114, +726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, +726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, +726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, +726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, +726,726,726,726,726,115,115,115,115,115,115,115,115,115,115,115, +726,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727, +727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727, +727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,115, /* block 187 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19,114,114, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19,713,405,109,109,109, 19, 19, 19,405,713,713, -713,713,713, 22, 22, 22, 22, 22, 22, 22, 22,109,109,109,109,109, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,728, +728,728,728,729,729,729,729,729,729,729,729,729,729,729,729,729, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 188 */ -109,109,109, 19, 19,109,109,109,109,109,109,109, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,109,109,109,109, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +479,477,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 189 */ -559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, -559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, -559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, -559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, -559,559,714,714,714,559,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +730,730,730,730,730,730,730,730,730,730,730,115,115,115,115,115, +730,730,730,730,730,730,730,730,730,730,730,730,730,115,115,115, /* block 190 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114,114, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +730,730,730,730,730,730,730,730,730,115,115,115,115,115,115,115, +730,730,730,730,730,730,730,730,730,730,115,115,731,732,732,733, + 22, 22, 22, 22,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 191 */ -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,438,438, -438,438,438,438,438,114,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115, /* block 192 */ -437,437,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,437,114,437,437, -114,114,437,114,114,437,437,114,114,437,437,437,437,114,437,437, -437,437,437,437,437,437,438,438,438,438,114,438,114,438,438,438, -438,438,438,438,114,438,438,438,438,438,438,438,438,438,438,438, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19,734,406,110,110,110, 19, 19, 19,406,734,734, +734,734,734, 22, 22, 22, 22, 22, 22, 22, 22,110,110,110,110,110, /* block 193 */ -438,438,438,438,437,437,114,437,437,437,437,114,114,437,437,437, -437,437,437,437,437,114,437,437,437,437,437,437,437,114,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,437,437,114,437,437,437,437,114, -437,437,437,437,437,114,437,114,114,114,437,437,437,437,437,437, -437,114,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +110,110,110, 19, 19,110,110,110,110,110,110,110, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,110,110,110,110, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 194 */ -437,437,437,437,437,437,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564, +564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564, +564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564, +564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564, +564,564,735,735,735,564,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 195 */ -438,438,438,438,438,438,438,438,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 196 */ -437,437,437,437,437,437,437,437,437,437,438,438,438,438,438,438, 438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,114,114,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437, 8,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438, 8,438,438,438,438, -438,438,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437, 8,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,439,439, +439,439,439,439,439,115,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, /* block 197 */ +438,438,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,438,115,438,438, +115,115,438,115,115,438,438,115,115,438,438,438,438,115,438,438, +438,438,438,438,438,438,439,439,439,439,115,439,115,439,439,439, +439,439,439,439,115,439,439,439,439,439,439,439,439,439,439,439, 438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438, 8,438,438,438,438,438,438,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437, 8,438,438,438,438,438,438,438,438,438,438, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, 8, -438,438,438,438,438,438,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, 8, -438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, /* block 198 */ -438,438,438,438,438,438,438,438,438, 8,438,438,438,438,438,438, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437, 8,438,438,438,438,438,438, +439,439,439,439,438,438,115,438,438,438,438,115,115,438,438,438, +438,438,438,438,438,115,438,438,438,438,438,438,438,115,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,438,438,115,438,438,438,438,115, +438,438,438,438,438,115,438,115,115,115,438,438,438,438,438,438, +438,115,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,438,438,438,438, 438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, -438,438,438, 8,438,438,438,438,438,438,437,438,114,114, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, /* block 199 */ -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, +438,438,438,438,438,438,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, /* block 200 */ -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, -715,715,715,715,715,114,114,716,716,716,716,716,716,716,716,716, -717,717,717,717,717,717,717,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +439,439,439,439,439,439,439,439,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, /* block 201 */ -199,199,199,199,114,199,199,199,199,199,199,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199, -114,199,199,114,199,114,114,199,114,199,199,199,199,199,199,199, -199,199,199,114,199,199,199,199,114,199,114,199,114,114,114,114, -114,114,199,114,114,114,114,199,114,199,114,199,114,199,199,199, -114,199,199,114,199,114,114,199,114,199,114,199,114,199,114,199, -114,199,199,114,199,114,114,199,199,199,199,114,199,199,199,199, -199,199,199,114,199,199,199,199,114,199,199,199,199,114,199,114, +438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,115,115,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438, 8,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439, 8,439,439,439,439, +439,439,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438, 8,439,439,439,439, /* block 202 */ -199,199,199,199,199,199,199,199,199,199,114,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,114,114,114,114, -114,199,199,199,114,199,199,199,199,199,114,199,199,199,199,199, -199,199,199,199,199,199,199,199,199,199,199,199,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -194,194,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439, 8,439,439,439,439,439,439,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438, 8,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, 8, +439,439,439,439,439,439,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, 8, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, /* block 203 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, +439,439,439,439,439,439,439,439,439, 8,439,439,439,439,439,439, +438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, +438,438,438,438,438,438,438,438,438, 8,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439, 8,439,439,439,439,439,439,438,439,115,115, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, /* block 204 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19,114,114,114,114,114,114,114,114,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114, -114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, -114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, -114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114,114,114, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, +736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736, /* block 205 */ - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, +737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +737,737,737,737,737,737,737,736,736,736,736,737,737,737,737,737, +737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +737,737,737,737,737,737,737,737,737,737,737,737,737,736,736,736, +736,736,736,736,736,737,736,736,736,736,736,736,736,736,736,736, /* block 206 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,718,718,718,718,718,718,718,718,718,718, -718,718,718,718,718,718,718,718,718,718,718,718,718,718,718,718, +736,736,736,736,737,736,736,738,738,738,738,738,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,737,737,737,737,737, +115,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 207 */ -719, 19, 19,114,114,114,114,114,114,114,114,114,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114, - 19, 19,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, /* block 208 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,115,115,740,740,740,740,740,740,740,740,740, +741,741,741,741,741,741,741,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 209 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114, -114,114,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114, +200,200,200,200,115,200,200,200,200,200,200,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200, +115,200,200,115,200,115,115,200,115,200,200,200,200,200,200,200, +200,200,200,115,200,200,200,200,115,200,115,200,115,115,115,115, +115,115,200,115,115,115,115,200,115,200,115,200,115,200,200,200, +115,200,200,115,200,115,115,200,115,200,115,200,115,200,115,200, +115,200,200,115,200,115,115,200,200,200,200,115,200,200,200,200, +200,200,200,115,200,200,200,200,115,200,200,200,200,115,200,115, /* block 210 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114, +200,200,200,200,200,200,200,200,200,200,115,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,115,115,115,115, +115,200,200,200,115,200,200,200,200,200,115,200,200,200,200,200, +200,200,200,200,200,200,200,200,200,200,200,200,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +195,195,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 211 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114, 19, 19, 19, 19, 19, /* block 212 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115, +115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, +115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, +115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115, /* block 213 */ + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, /* block 214 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114, - 19, 19, 19, 19,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,742,742,742,742,742,742,742,742,742,742, +742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742, /* block 215 */ +743, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115, + 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 216 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, @@ -3519,109 +3543,199 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 14, 14, 14, 14, 14, /* block 217 */ - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115, 19, 19, 19, 19, 19, /* block 218 */ - 19, 19, 19, 19, 19, 19, 19, 19,114,114,114,114,114,114,114,114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, /* block 219 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115, + 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115, /* block 220 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,114,114,114,114,114,114,114,114,114,114,114, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115, /* block 221 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, -114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 222 */ -436, 22,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, /* block 223 */ -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, + 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 224 */ -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 225 */ -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, -436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436, + 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, /* block 226 */ -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,114,114, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + +/* block 227 */ +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,115,115,115,115,115,115,115,115,115,115,115, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, + +/* block 228 */ +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,115,115, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, + +/* block 229 */ +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + +/* block 230 */ +485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485, +485,485,485,485,485,485,485,485,485,485,485,485,485,485,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, +115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + +/* block 231 */ +437, 22,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + +/* block 232 */ +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, + +/* block 233 */ +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, + +/* block 234 */ +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, + +/* block 235 */ +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,115,115, }; diff --git a/pcre2/src/pcre2_ucp.h b/pcre2/src/pcre2_ucp.h index e7db0c015..02e5012c2 100644 --- a/pcre2/src/pcre2_ucp.h +++ b/pcre2/src/pcre2_ucp.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -39,8 +39,8 @@ POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _PCRE2_UCP_H -#define _PCRE2_UCP_H +#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD +#define PCRE2_UCP_H_IDEMPOTENT_GUARD /* This file contains definitions of the property values that are returned by the UCD access macros. New values that are added for new releases of Unicode @@ -253,9 +253,16 @@ enum { ucp_Pau_Cin_Hau, ucp_Siddham, ucp_Tirhuta, - ucp_Warang_Citi + ucp_Warang_Citi, + /* New for Unicode 8.0.0: */ + ucp_Ahom, + ucp_Anatolian_Hieroglyphs, + ucp_Hatran, + ucp_Multani, + ucp_Old_Hungarian, + ucp_SignWriting }; -#endif +#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ /* End of pcre2_ucp.h */ diff --git a/pcre2/src/pcre2_valid_utf.c b/pcre2/src/pcre2_valid_utf.c index a97847ab9..3e18f1200 100644 --- a/pcre2/src/pcre2_valid_utf.c +++ b/pcre2/src/pcre2_valid_utf.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -93,8 +93,8 @@ Returns: == 0 if the string is a valid UTF string int PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset) { -register PCRE2_SPTR p; -register uint32_t c; +PCRE2_SPTR p; +uint32_t c; /* ----------------- Check a UTF-8 string ----------------- */ @@ -131,11 +131,13 @@ PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character) PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff */ -for (p = string; length-- > 0; p++) +for (p = string; length > 0; p++) { - register uint32_t ab, d; + uint32_t ab, d; c = *p; + length--; + if (c < 128) continue; /* ASCII character */ if (c < 0xc0) /* Isolated 10xx xxxx byte */ @@ -324,9 +326,10 @@ PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate */ -for (p = string; length-- > 0; p++) +for (p = string; length > 0; p++) { c = *p; + length--; if ((c & 0xf800) != 0xd800) { @@ -368,7 +371,7 @@ PCRE2_ERROR_UTF32_ERR1 Surrogate character PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff */ -for (p = string; length-- > 0; p++) +for (p = string; length > 0; length--, p++) { c = *p; if ((c & 0xfffff800u) != 0xd800u) diff --git a/pcre2/src/pcre2_xclass.c b/pcre2/src/pcre2_xclass.c index 2ea89c4b8..407d3f5b8 100644 --- a/pcre2/src/pcre2_xclass.c +++ b/pcre2/src/pcre2_xclass.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -247,7 +247,7 @@ while ((t = *data++) != XCL_END) case PT_PXPUNCT: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P || - (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) + (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) return !negated; break; diff --git a/pcre2/src/pcre2demo.c b/pcre2/src/pcre2demo.c index ec51cf11c..8ae49f100 100644 --- a/pcre2/src/pcre2demo.c +++ b/pcre2/src/pcre2demo.c @@ -3,28 +3,31 @@ *************************************************/ /* This is a demonstration program to illustrate a straightforward way of -calling the PCRE2 regular expression library from a C program. See the +using the PCRE2 regular expression library from a C program. See the pcre2sample documentation for a short discussion ("man pcre2sample" if you have the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is incompatible with the original PCRE API. There are actually three libraries, each supporting a different code unit -width. This demonstration program uses the 8-bit library. +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. In Unix-like environments, if PCRE2 is installed in your standard system libraries, you should be able to compile this program using this command: -gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo If PCRE2 is not installed in a standard place, it is likely to be installed with support for the pkg-config mechanism. If you have pkg-config, you can compile this program using this command: -gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo -If you do not have pkg-config, you may have to use this: +If you do not have pkg-config, you may have to use something like this: -gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \ +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \ -R/usr/local/lib -lpcre2-8 -o pcre2demo Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and @@ -39,9 +42,14 @@ the following line. */ /* #define PCRE2_STATIC */ -/* This macro must be defined before including pcre2.h. For a program that uses -only one code unit width, it makes it possible to use generic function names -such as pcre2_compile(). */ +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ #define PCRE2_CODE_UNIT_WIDTH 8 @@ -62,19 +70,19 @@ int main(int argc, char **argv) { pcre2_code *re; PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ -PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ PCRE2_SPTR name_table; int crlf_is_newline; int errornumber; int find_all; int i; -int namecount; -int name_entry_size; int rc; int utf8; uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; uint32_t newline; PCRE2_SIZE erroroffset; @@ -89,15 +97,19 @@ pcre2_match_data *match_data; * First, sort out the command line. There is only one possible option at * * the moment, "-g" to request repeated matching to find all occurrences, * * like Perl's /g option. We set the variable find_all to a non-zero value * -* if the -g option is present. Apart from that, there must be exactly two * -* arguments. * +* if the -g option is present. * **************************************************************************/ find_all = 0; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-g") == 0) find_all = 1; - else break; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\n", argv[i]); + return 1; + } + else break; } /* After the options, we require exactly two arguments, which are the pattern, @@ -105,7 +117,7 @@ and the subject string. */ if (argc - i != 2) { - printf("Two arguments required: a regex and a subject string\n"); + printf("Exactly two arguments required: a regex and a subject string\n"); return 1; } @@ -184,7 +196,7 @@ if (rc < 0) stored. */ ovector = pcre2_get_ovector_pointer(match_data); -printf("\nMatch succeeded at offset %d\n", (int)ovector[0]); +printf("Match succeeded at offset %d\n", (int)ovector[0]); /************************************************************************* @@ -225,7 +237,7 @@ we have to extract the count of named parentheses from the pattern. */ PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ &namecount); /* where to put the answer */ -if (namecount <= 0) printf("No named substrings\n"); else +if (namecount == 0) printf("No named substrings\n"); else { PCRE2_SPTR tabptr; printf("Named substrings\n"); @@ -313,8 +325,8 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY || for (;;) { - uint32_t options = 0; /* Normally no options */ - PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ /* If the previous match was for an empty string, we are finished if we are at the end of the subject. Otherwise, arrange to run another match at the @@ -354,7 +366,7 @@ for (;;) { if (options == 0) break; /* All matches found */ ovector[1] = start_offset + 1; /* Advance one code unit */ - if (crlf_is_newline && /* If CRLF is newline & */ + if (crlf_is_newline && /* If CRLF is a newline & */ start_offset < subject_length - 1 && /* we are at CRLF, */ subject[start_offset] == '\r' && subject[start_offset + 1] == '\n') @@ -400,7 +412,7 @@ for (;;) printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start); } - if (namecount <= 0) printf("No named substrings\n"); else + if (namecount == 0) printf("No named substrings\n"); else { PCRE2_SPTR tabptr = name_table; printf("Named substrings\n"); diff --git a/pcre2/src/pcre2grep.c b/pcre2/src/pcre2grep.c index d5a5d6db9..e98d743de 100644 --- a/pcre2/src/pcre2grep.c +++ b/pcre2/src/pcre2grep.c @@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS. The header can be found in the special z/OS distribution, which is available from www.zaconsultants.net or from www.cbttape.org. - Copyright (c) 1997-2014 University of Cambridge + Copyright (c) 1997-2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -58,6 +58,23 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) && !defined WIN32 +#define WIN32 +#endif + +#ifdef WIN32 +#include /* For _setmode() */ +#include /* For _O_BINARY */ +#endif + +#ifdef SUPPORT_PCRE2GREP_CALLOUT +#ifdef WIN32 +#include +#else +#include +#endif +#endif + #ifdef HAVE_UNISTD_H #include #endif @@ -121,6 +138,20 @@ apply to fprintf(). */ #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {} +/* Under Windows, we have to set stdout to be binary, so that it does not +convert \r\n at the ends of output lines to \r\r\n. However, that means that +any messages written to stdout must have \r\n as their line terminator. This is +handled by using STDOUT_NL as the newline string. We also use a normal double +quote for the example, as single quotes aren't usually available. */ + +#ifdef WIN32 +#define STDOUT_NL "\r\n" +#define QUOT "\"" +#else +#define STDOUT_NL "\n" +#define QUOT "'" +#endif + /************************************************* @@ -138,25 +169,29 @@ static const char *jfriedl_prefix = ""; static const char *jfriedl_postfix = ""; #endif -static char *colour_string = (char *)"1;31"; -static char *colour_option = NULL; -static char *dee_option = NULL; -static char *DEE_option = NULL; -static char *locale = NULL; +static const char *colour_string = "1;31"; +static const char *colour_option = NULL; +static const char *dee_option = NULL; +static const char *DEE_option = NULL; +static const char *locale = NULL; +static const char *newline_arg = NULL; +static const char *om_separator = ""; +static const char *stdin_name = "(standard input)"; + static char *main_buffer = NULL; -static char *newline_arg = NULL; -static char *om_separator = (char *)""; -static char *stdin_name = (char *)"(standard input)"; static int after_context = 0; static int before_context = 0; static int binary_files = BIN_BINARY; static int both_context = 0; static int bufthird = PCRE2GREP_BUFSIZE; +static int max_bufthird = PCRE2GREP_MAX_BUFSIZE; static int bufsize = 3*PCRE2GREP_BUFSIZE; static int endlinetype; +static int total_count = 0; +static int counts_printed = 0; -#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H +#ifdef WIN32 static int dee_action = dee_SKIP; #else static int dee_action = dee_READ; @@ -185,6 +220,9 @@ static PCRE2_SIZE *offsets; static BOOL count_only = FALSE; static BOOL do_colour = FALSE; +#ifdef WIN32 +static BOOL do_ansi = FALSE; +#endif static BOOL file_offsets = FALSE; static BOOL hyphenpending = FALSE; static BOOL invert = FALSE; @@ -196,6 +234,7 @@ static BOOL omit_zero_count = FALSE; static BOOL resource_error = FALSE; static BOOL quiet = FALSE; static BOOL show_only_matching = FALSE; +static BOOL show_total_count = FALSE; static BOOL silent = FALSE; static BOOL utf = FALSE; @@ -324,6 +363,7 @@ used to identify them. */ #define N_EXCLUDE_FROM (-19) #define N_INCLUDE_FROM (-20) #define N_OM_SEPARATOR (-21) +#define N_MAX_BUFSIZE (-22) static option_item optionlist[] = { { OP_NODATA, N_NULL, NULL, "", "terminate options" }, @@ -332,7 +372,8 @@ static option_item optionlist[] = { { OP_NODATA, 'a', NULL, "text", "treat binary files as text" }, { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" }, - { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" }, + { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" }, + { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" }, { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, @@ -348,11 +389,6 @@ static option_item optionlist[] = { { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" }, { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, -#ifdef SUPPORT_PCRE2GREP_JIT - { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" }, -#else - { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, -#endif { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, @@ -364,6 +400,11 @@ static option_item optionlist[] = { { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, +#ifdef SUPPORT_PCRE2GREP_JIT + { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" }, +#else + { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, +#endif { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, @@ -378,6 +419,7 @@ static option_item optionlist[] = { { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, #endif { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, + { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" }, { OP_NODATA, 'u', NULL, "utf", "use UTF mode" }, { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, @@ -435,6 +477,34 @@ return 0; } +/************************************************* +* Parse GREP_COLORS * +*************************************************/ + +/* Extract ms or mt from GREP_COLORS. + +Argument: the string, possibly NULL +Returns: the value of ms or mt, or NULL if neither present +*/ + +static char * +parse_grep_colors(const char *gc) +{ +static char seq[16]; +char *col; +uint32_t len; +if (gc == NULL) return NULL; +col = strstr(gc, "ms="); +if (col == NULL) col = strstr(gc, "mt="); +if (col == NULL) return NULL; +len = 0; +col += 3; +while (*col != ':' && *col != 0 && len < sizeof(seq)-1) + seq[len++] = *col++; +seq[len] = 0; +return seq; +} + /************************************************* * Exit from the program * @@ -657,6 +727,18 @@ return isatty(fileno(f)); } #endif + +/************* Print optionally coloured match Unix-style and z/OS **********/ + +static void +print_match(const char* buf, int length) +{ +if (length == 0) return; +if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); +FWRITE(buf, 1, length, stdout); +if (do_colour) fprintf(stdout, "%c[0m", 0x1b); +} + /* End of Unix-style or native z/OS environment functions. */ @@ -665,11 +747,9 @@ return isatty(fileno(f)); /* I (Philip Hazel) have no means of testing this code. It was contributed by Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES when it did not exist. David Byron added a patch that moved the #include of - to before the INVALID_FILE_ATTRIBUTES definition rather than after. -The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is -undefined when it is indeed undefined. */ + to before the INVALID_FILE_ATTRIBUTES definition rather than after. */ -#elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H +#elif defined WIN32 #ifndef STRICT # define STRICT @@ -684,6 +764,11 @@ undefined when it is indeed undefined. */ #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF #endif +/* Allow opendirectory to provide globbing, since Microsoft started doing it +wrong (expanding quoted arguments). */ + +#define iswild(name) (strpbrk(name, "*?") != NULL) + typedef struct directory_type { HANDLE handle; @@ -718,7 +803,10 @@ if ((pattern == NULL) || (dir == NULL)) pcre2grep_exit(2); } memcpy(pattern, filename, len); -memcpy(&(pattern[len]), "\\*", 3); +if (iswild(filename)) + pattern[len] = 0; +else + memcpy(&(pattern[len]), "\\*", 3); dir->handle = FindFirstFile(pattern, &(dir->data)); if (dir->handle != INVALID_HANDLE_VALUE) { @@ -776,18 +864,92 @@ return !isdirectory(filename); /************* Test for a terminal in Windows **********/ -/* I don't know how to do this; assume never */ - static BOOL is_stdout_tty(void) { -return FALSE; +return _isatty(_fileno(stdout)); } static BOOL is_file_tty(FILE *f) { -return FALSE; +return _isatty(_fileno(f)); +} + + +/************* Print optionally coloured match in Windows **********/ + +static HANDLE hstdout; +static CONSOLE_SCREEN_BUFFER_INFO csbi; +static WORD match_colour; + +static void +print_match(const char* buf, int length) +{ +if (length == 0) return; +if (do_colour) + { + if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string); + else SetConsoleTextAttribute(hstdout, match_colour); + } +FWRITE(buf, 1, length, stdout); +if (do_colour) + { + if (do_ansi) fprintf(stdout, "%c[0m", 0x1b); + else SetConsoleTextAttribute(hstdout, csbi.wAttributes); + } +} + +/* Convert ANSI BGR format to RGB used by Windows */ +#define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0)) + +static WORD +decode_ANSI_colour(const char *cs) +{ +WORD result = csbi.wAttributes; +while (*cs) + { + if (isdigit(*cs)) + { + int code = atoi(cs); + if (code == 1) result |= 0x08; + else if (code == 4) result |= 0x8000; + else if (code == 5) result |= 0x80; + else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30); + else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F); + else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4); + else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0); + /* aixterm high intensity colour codes */ + else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08; + else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80; + + while (isdigit(*cs)) cs++; + } + + if (*cs) cs++; + } + +return result; +} + +static void +init_colour_output() +{ +if (do_colour) + { + hstdout = GetStdHandle(STD_OUTPUT_HANDLE); + /* This fails when redirected to con; try again if so. */ + if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi) + { + HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL); + GetConsoleScreenBufferInfo(hcon, &csbi); + CloseHandle(hcon); + } + match_colour = decode_ANSI_colour(colour_string); + /* No valid colour found - turn off colouring */ + if (!match_colour) do_colour = FALSE; + } } /* End of Windows functions */ @@ -829,6 +991,16 @@ is_file_tty(FILE *f) return FALSE; } + +/************* Print optionally coloured match when we can't do it **********/ + +static void +print_match(const char* buf, int length) +{ +if (length == 0) return; +FWRITE(buf, 1, length, stdout); +} + #endif /* End of system-specific functions */ @@ -869,7 +1041,7 @@ for (op = optionlist; op->one_char != 0; op++) if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); } fprintf(stderr, "] [long options] [pattern] [files]\n"); -fprintf(stderr, "Type `pcre2grep --help' for more information and the long " +fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long " "options.\n"); return rc; } @@ -885,27 +1057,34 @@ help(void) { option_item *op; -printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); -printf("Search for PATTERN in each FILE or standard input.\n"); -printf("PATTERN must be present if neither -e nor -f is used.\n"); -printf("\"-\" can be used as a file name to mean STDIN.\n"); +printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL); +printf("Search for PATTERN in each FILE or standard input." STDOUT_NL); +printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL); + +#ifdef SUPPORT_PCRE2GREP_CALLOUT +printf("Callout scripts in patterns are supported." STDOUT_NL); +#else +printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL); +#endif + +printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL); #ifdef SUPPORT_LIBZ -printf("Files whose names end in .gz are read using zlib.\n"); +printf("Files whose names end in .gz are read using zlib." STDOUT_NL); #endif #ifdef SUPPORT_LIBBZ2 -printf("Files whose names end in .bz2 are read using bzlib2.\n"); +printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL); #endif #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 -printf("Other files and the standard input are read as plain files.\n\n"); +printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL); #else -printf("All files are read as plain files, without any interpretation.\n\n"); +printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL); #endif -printf("Example: pcre2grep -i 'hello.*world' menu.h main.c\n\n"); -printf("Options:\n"); +printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL); +printf("Options:" STDOUT_NL); for (op = optionlist; op->one_char != 0; op++) { @@ -922,17 +1101,18 @@ for (op = optionlist; op->one_char != 0; op++) } if (n < 1) n = 1; - printf("%.*s%s\n", n, " ", op->help_text); + printf("%.*s%s" STDOUT_NL, n, " ", op->help_text); } -printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n"); -printf("The default value for --buffer-size is %d.\n", PCRE2GREP_BUFSIZE); -printf("When reading patterns or file names from a file, trailing white\n"); -printf("space is removed and blank lines are ignored.\n"); -printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN); +printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL); +printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE); +printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE); +printf("When reading patterns or file names from a file, trailing white" STDOUT_NL); +printf("space is removed and blank lines are ignored." STDOUT_NL); +printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN); -printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n"); -printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); +printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL); +printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL); } @@ -1073,12 +1253,12 @@ return om; * Read one line of input * *************************************************/ -/* Normally, input is read using fread() into a large buffer, so many lines may -be read at once. However, doing this for tty input means that no output appears -until a lot of input has been typed. Instead, tty input is handled line by -line. We cannot use fgets() for this, because it does not stop at a binary -zero, and therefore there is no way of telling how many characters it has read, -because there may be binary zeros embedded in the data. +/* Normally, input is read using fread() (or gzread, or BZ2_read) into a large +buffer, so many lines may be read at once. However, doing this for tty input +means that no output appears until a lot of input has been typed. Instead, tty +input is handled line by line. We cannot use fgets() for this, because it does +not stop at a binary zero, and therefore there is no way of telling how many +characters it has read, because there may be binary zeros embedded in the data. Arguments: buffer the buffer to read into @@ -1166,7 +1346,7 @@ switch(endlinetype) while (p < endptr) { int extra = 0; - register int c = *((unsigned char *)p); + int c = *((unsigned char *)p); if (utf && c >= 0xc0) { @@ -1210,7 +1390,7 @@ switch(endlinetype) while (p < endptr) { int extra = 0; - register int c = *((unsigned char *)p); + int c = *((unsigned char *)p); if (utf && c >= 0xc0) { @@ -1312,7 +1492,7 @@ switch(endlinetype) while (p > startptr) { - register unsigned int c; + unsigned int c; char *pp = p - 1; if (utf) @@ -1392,22 +1572,23 @@ Returns: nothing static void do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr, - char *printname) + const char *printname) { if (after_context > 0 && lastmatchnumber > 0) { int count = 0; - while (lastmatchrestart < endptr && count++ < after_context) + while (lastmatchrestart < endptr && count < after_context) { int ellength; - char *pp = lastmatchrestart; + char *pp = end_of_line(lastmatchrestart, endptr, &ellength); + if (ellength == 0 && pp == main_buffer + bufsize) break; if (printname != NULL) fprintf(stdout, "%s-", printname); if (number) fprintf(stdout, "%d-", lastmatchnumber++); - pp = end_of_line(pp, endptr, &ellength); FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); lastmatchrestart = pp; + count++; } - hyphenpending = TRUE; + if (count > 0) hyphenpending = TRUE; } } @@ -1473,6 +1654,309 @@ return FALSE; /* No match, no errors */ } +#ifdef SUPPORT_PCRE2GREP_CALLOUT + +/************************************************* +* Parse and execute callout scripts * +*************************************************/ + +/* This function parses a callout string block and executes the +program specified by the string. The string is a list of substrings +separated by pipe characters. The first substring represents the +executable name, and the following substrings specify the arguments: + + program_name|param1|param2|... + +Any substirng (including the program name) can contain escape sequences +started by the dollar character. The escape sequences are substituted as +follows: + + $ or ${} is replaced by the captured substring of the given + decimal number, which must be greater than zero. If the number is greater + than the number of capturing substrings, or if the capture is unset, the + replacement is empty. + + Any other character is substituted by itself. E.g: $$ is replaced by a single + dollar or $| replaced by a pipe character. + +Example: + + echo -e "abcde\n12345" | pcre2grep \ + '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - + + Output: + + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 + +Arguments: + blockptr the callout block + +Returns: currently it always returns with 0 +*/ + +static int +pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused) +{ +PCRE2_SIZE length = calloutptr->callout_string_length; +PCRE2_SPTR string = calloutptr->callout_string; +PCRE2_SPTR subject = calloutptr->subject; +PCRE2_SIZE *ovector = calloutptr->offset_vector; +PCRE2_SIZE capture_top = calloutptr->capture_top; +PCRE2_SIZE argsvectorlen = 2; +PCRE2_SIZE argslen = 1; +char *args; +char *argsptr; +char **argsvector; +char **argsvectorptr; +#ifndef WIN32 +pid_t pid; +#endif +int result = 0; + +(void)unused; /* Avoid compiler warning */ + +/* Only callout with strings are supported. */ +if (string == NULL || length == 0) return 0; + +/* Checking syntax and compute the number of string fragments. Callout strings +are ignored in case of a syntax error. */ + +while (length > 0) + { + if (*string == '|') + { + argsvectorlen++; + + /* Maximum 10000 arguments allowed. */ + if (argsvectorlen > 10000) return 0; + } + else if (*string == '$') + { + PCRE2_SIZE capture_id = 0; + + string++; + length--; + + /* Syntax error: a character must be present after $. */ + if (length == 0) return 0; + + if (*string >= '1' && *string <= '9') + { + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + } + while (length > 0 && *string >= '0' && *string <= '9'); + + /* To negate the effect of string++ below. */ + string--; + length++; + } + else if (*string == '{') + { + /* Must be a decimal number in braces, e.g: {5} or {38} */ + string++; + length--; + + /* Syntax error: a decimal number required. */ + if (length == 0) return 0; + if (*string < '1' || *string > '9') return 0; + + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + + /* Syntax error: no more characters */ + if (length == 0) return 0; + } + while (*string >= '0' && *string <= '9'); + + /* Syntax error: closing brace is missing. */ + if (*string != '}') return 0; + } + + if (capture_id > 0) + { + if (capture_id < capture_top) + { + capture_id *= 2; + argslen += ovector[capture_id + 1] - ovector[capture_id]; + } + + /* To negate the effect of argslen++ below. */ + argslen--; + } + } + + string++; + length--; + argslen++; + } + +args = (char*)malloc(argslen); +if (args == NULL) return 0; + +argsvector = (char**)malloc(argsvectorlen * sizeof(char*)); +if (argsvector == NULL) + { + free(args); + return 0; + } + +argsptr = args; +argsvectorptr = argsvector; + +*argsvectorptr++ = argsptr; + +length = calloutptr->callout_string_length; +string = calloutptr->callout_string; + +while (length > 0) + { + if (*string == '|') + { + *argsptr++ = '\0'; + *argsvectorptr++ = argsptr; + } + else if (*string == '$') + { + string++; + length--; + + if ((*string >= '1' && *string <= '9') || *string == '{') + { + PCRE2_SIZE capture_id = 0; + + if (*string != '{') + { + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + } + while (length > 0 && *string >= '0' && *string <= '9'); + + /* To negate the effect of string++ below. */ + string--; + length++; + } + else + { + string++; + length--; + + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + } + while (*string != '}'); + } + + if (capture_id < capture_top) + { + PCRE2_SIZE capturesize; + capture_id *= 2; + + capturesize = ovector[capture_id + 1] - ovector[capture_id]; + memcpy(argsptr, subject + ovector[capture_id], capturesize); + argsptr += capturesize; + } + } + else + { + *argsptr++ = *string; + } + } + else + { + *argsptr++ = *string; + } + + string++; + length--; + } + +*argsptr++ = '\0'; +*argsvectorptr = NULL; + +#ifdef WIN32 +result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector); +#else +pid = fork(); + +if (pid == 0) + { + (void)execv(argsvector[0], argsvector); + /* Control gets here if there is an error, e.g. a non-existent program */ + exit(1); + } +else if (pid > 0) + (void)waitpid(pid, &result, 0); +#endif + +free(args); +free(argsvector); + +/* Currently negative return values are not supported, only zero (match +continues) or non-zero (match fails). */ + +return result != 0; +} + +#endif + + + +/************************************************* +* Read a portion of the file into buffer * +*************************************************/ + +static int +fill_buffer(void *handle, int frtype, char *buffer, int length, + BOOL input_line_buffered) +{ +(void)frtype; /* Avoid warning when not used */ + +#ifdef SUPPORT_LIBZ +if (frtype == FR_LIBZ) + return gzread((gzFile)handle, buffer, length); +else +#endif + +#ifdef SUPPORT_LIBBZ2 +if (frtype == FR_LIBBZ2) + return BZ2_bzread((BZFILE *)handle, buffer, length); +else +#endif + +return (input_line_buffered ? + read_one_line(buffer, length, (FILE *)handle) : + fread(buffer, 1, length, (FILE *)handle)); +} + + /************************************************* * Grep an individual file * @@ -1502,7 +1986,7 @@ Returns: 0 if there was at least one match */ static int -pcre2grep(void *handle, int frtype, char *filename, char *printname) +pcre2grep(void *handle, int frtype, const char *filename, const char *printname) { int rc = 1; int linenumber = 1; @@ -1518,49 +2002,24 @@ BOOL endhyphenpending = FALSE; BOOL input_line_buffered = line_buffered; FILE *in = NULL; /* Ensure initialized */ -#ifdef SUPPORT_LIBZ -gzFile ingz = NULL; -#endif - -#ifdef SUPPORT_LIBBZ2 -BZFILE *inbz2 = NULL; -#endif - - /* Do the first read into the start of the buffer and set up the pointer to end of what we have. In the case of libz, a non-zipped .gz file will be read as a plain file. However, if a .bz2 file isn't actually bzipped, the first read will fail. */ -(void)frtype; - -#ifdef SUPPORT_LIBZ -if (frtype == FR_LIBZ) - { - ingz = (gzFile)handle; - bufflength = gzread (ingz, main_buffer, bufsize); - } -else -#endif - -#ifdef SUPPORT_LIBBZ2 -if (frtype == FR_LIBBZ2) - { - inbz2 = (BZFILE *)handle; - bufflength = BZ2_bzread(inbz2, main_buffer, bufsize); - if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ - } /* without the cast it is unsigned. */ -else -#endif - +if (frtype != FR_LIBZ && frtype != FR_LIBBZ2) { in = (FILE *)handle; if (is_file_tty(in)) input_line_buffered = TRUE; - bufflength = input_line_buffered? - read_one_line(main_buffer, bufsize, in) : - fread(main_buffer, 1, bufsize, in); } +bufflength = fill_buffer(handle, frtype, main_buffer, bufsize, + input_line_buffered); + +#ifdef SUPPORT_LIBBZ2 +if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ +#endif + endptr = main_buffer + bufflength; /* Unless binary-files=text, see if we have a binary file. This uses the same @@ -1591,7 +2050,7 @@ while (ptr < endptr) size_t startoffset = 0; /* At this point, ptr is at the start of a line. We need to find the length - of the subject string to pass to pcre_exec(). In multiline mode, it is the + of the subject string to pass to pcre2_match(). In multiline mode, it is the length remainder of the data in the buffer. Otherwise, it is the length of the next line, excluding the terminating newline. After matching, we always advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE @@ -1604,16 +2063,61 @@ while (ptr < endptr) /* Check to see if the line we are looking at extends right to the very end of the buffer without a line terminator. This means the line is too long to - handle. */ + handle at the current buffer size. Until the buffer reaches its maximum size, + try doubling it and reading more data. */ if (endlinelength == 0 && t == main_buffer + bufsize) { - fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n" - "pcre2grep: check the --buffer-size option\n", - linenumber, - (filename == NULL)? "" : " of file ", - (filename == NULL)? "" : filename); - return 2; + if (bufthird < max_bufthird) + { + char *new_buffer; + int new_bufthird = 2*bufthird; + + if (new_bufthird > max_bufthird) new_bufthird = max_bufthird; + new_buffer = (char *)malloc(3*new_bufthird); + + if (new_buffer == NULL) + { + fprintf(stderr, + "pcre2grep: line %d%s%s is too long for the internal buffer\n" + "pcre2grep: not enough memory to increase the buffer size to %d\n", + linenumber, + (filename == NULL)? "" : " of file ", + (filename == NULL)? "" : filename, + new_bufthird); + return 2; + } + + /* Copy the data and adjust pointers to the new buffer location. */ + + memcpy(new_buffer, main_buffer, bufsize); + bufthird = new_bufthird; + bufsize = 3*bufthird; + ptr = new_buffer + (ptr - main_buffer); + lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer); + free(main_buffer); + main_buffer = new_buffer; + + /* Read more data into the buffer and then try to find the line ending + again. */ + + bufflength += fill_buffer(handle, frtype, main_buffer + bufflength, + bufsize - bufflength, input_line_buffered); + endptr = main_buffer + bufflength; + continue; + } + else + { + fprintf(stderr, + "pcre2grep: line %d%s%s is too long for the internal buffer\n" + "pcre2grep: the maximum buffer size is %d\n" + "pcre2grep: use the --max-buffer-size option to change it\n", + linenumber, + (filename == NULL)? "" : " of file ", + (filename == NULL)? "" : filename, + bufthird); + return 2; + } } /* Extra processing for Jeffrey Friedl's debugging. */ @@ -1691,9 +2195,13 @@ while (ptr < endptr) if (filenames == FN_NOMATCH_ONLY) return 1; + /* If all we want is a yes/no answer, we can return immediately. */ + + if (quiet) return 0; + /* Just count if just counting is wanted. */ - if (count_only) count++; + else if (count_only || show_total_count) count++; /* When handling a binary file and binary-files==binary, the "binary" variable will be set true (it's false in all other cases). In this @@ -1701,23 +2209,19 @@ while (ptr < endptr) else if (binary) { - fprintf(stdout, "Binary file %s matches\n", filename); + fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename); return 0; } - /* If all we want is a file name, there is no need to scan any more lines - in the file. */ + /* Likewise, if all we want is a file name, there is no need to scan any + more lines in the file. */ else if (filenames == FN_MATCH_ONLY) { - fprintf(stdout, "%s\n", printname); + fprintf(stdout, "%s" STDOUT_NL, printname); return 0; } - /* Likewise, if all we want is a yes/no answer. */ - - else if (quiet) return 0; - /* The --only-matching option prints just the substring that matched, and/or one or more captured portions of it, as long as these strings are not empty. The --file-offsets and --line-offsets options output offsets for @@ -1739,13 +2243,13 @@ while (ptr < endptr) /* Handle --line-offsets */ if (line_offsets) - fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), + fprintf(stdout, "%d,%d" STDOUT_NL, (int)(matchptr + offsets[0] - ptr), (int)(offsets[1] - offsets[0])); /* Handle --file-offsets */ else if (file_offsets) - fprintf(stdout, "%d,%d\n", + fprintf(stdout, "%d,%d" STDOUT_NL, (int)(filepos + matchptr + offsets[0] - ptr), (int)(offsets[1] - offsets[0])); @@ -1765,34 +2269,51 @@ while (ptr < endptr) if (plen > 0) { if (printed) fprintf(stdout, "%s", om_separator); - if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); - FWRITE(matchptr + offsets[n*2], 1, plen, stdout); - if (do_colour) fprintf(stdout, "%c[00m", 0x1b); + print_match(matchptr + offsets[n*2], plen); printed = TRUE; } } } - if (printed || printname != NULL || number) fprintf(stdout, "\n"); + if (printed || printname != NULL || number) + fprintf(stdout, STDOUT_NL); } - /* Prepare to repeat to find the next match. If the pattern contained a - lookbehind that included \K, it is possible that the end of the match - might be at or before the actual starting offset we have just used. In - this case, start one character further on. */ + /* Prepare to repeat to find the next match in the line. */ match = FALSE; if (line_buffered) fflush(stdout); rc = 0; /* Had some success */ + + /* If the pattern contained a lookbehind that included \K, it is + possible that the end of the match might be at or before the actual + starting offset we have just used. In this case, start one character + further on. */ + startoffset = offsets[1]; /* Restart after the match */ oldstartoffset = pcre2_get_startchar(match_data); if (startoffset <= oldstartoffset) { if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */ startoffset = oldstartoffset + 1; - if (utf) - while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; + if (utf) while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; } + + /* If the current match ended past the end of the line (only possible + in multiline mode), we must move on to the line in which it did end + before searching for more matches. */ + + while (startoffset > linelength) + { + matchptr = ptr += linelength + endlinelength; + filepos += (int)(linelength + endlinelength); + linenumber++; + startoffset -= (int)(linelength + endlinelength); + t = end_of_line(ptr, endptr, &endlinelength); + linelength = t - ptr - endlinelength; + length = (size_t)(endptr - ptr); + } + goto ONLY_MATCHING_RESTART; } } @@ -1838,7 +2359,7 @@ while (ptr < endptr) if (hyphenpending) { - fprintf(stdout, "--\n"); + fprintf(stdout, "--" STDOUT_NL); hyphenpending = FALSE; hyphenprinted = TRUE; } @@ -1859,7 +2380,7 @@ while (ptr < endptr) } if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) - fprintf(stdout, "--\n"); + fprintf(stdout, "--" STDOUT_NL); while (p < ptr) { @@ -1926,9 +2447,7 @@ while (ptr < endptr) { int plength; FWRITE(ptr, 1, offsets[0], stdout); - fprintf(stdout, "%c[%sm", 0x1b, colour_string); - FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); - fprintf(stdout, "%c[00m", 0x1b); + print_match(ptr + offsets[0], offsets[1] - offsets[0]); for (;;) { startoffset = offsets[1]; @@ -1936,9 +2455,7 @@ while (ptr < endptr) !match_patterns(matchptr, length, options, startoffset, &mrc)) break; FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout); - fprintf(stdout, "%c[%sm", 0x1b, colour_string); - FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); - fprintf(stdout, "%c[00m", 0x1b); + print_match(matchptr + offsets[0], offsets[1] - offsets[0]); } /* In multiline mode, we may have already printed the complete line @@ -2015,7 +2532,7 @@ while (ptr < endptr) lastmatchrestart < main_buffer + bufthird) { do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); - lastmatchnumber = 0; + lastmatchnumber = 0; /* Indicates no after lines pending */ } /* Now do the shuffle */ @@ -2023,24 +2540,8 @@ while (ptr < endptr) memmove(main_buffer, main_buffer + bufthird, 2*bufthird); ptr -= bufthird; -#ifdef SUPPORT_LIBZ - if (frtype == FR_LIBZ) - bufflength = 2*bufthird + - gzread (ingz, main_buffer + 2*bufthird, bufthird); - else -#endif - -#ifdef SUPPORT_LIBBZ2 - if (frtype == FR_LIBBZ2) - bufflength = 2*bufthird + - BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird); - else -#endif - - bufflength = 2*bufthird + - (input_line_buffered? - read_one_line(main_buffer + 2*bufthird, bufthird, in) : - fread(main_buffer + 2*bufthird, 1, bufthird, in)); + bufflength = 2*bufthird + fill_buffer(handle, frtype, + main_buffer + 2*bufthird, bufthird, input_line_buffered); endptr = main_buffer + bufflength; /* Adjust any last match point */ @@ -2052,7 +2553,7 @@ while (ptr < endptr) /* End of file; print final "after" lines if wanted; do_after_lines sets hyphenpending if it prints something. */ -if (!show_only_matching && !count_only) +if (!show_only_matching && !(count_only|show_total_count)) { do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); hyphenpending |= endhyphenpending; @@ -2063,22 +2564,24 @@ were none. If we found a match, we won't have got this far. */ if (filenames == FN_NOMATCH_ONLY) { - fprintf(stdout, "%s\n", printname); + fprintf(stdout, "%s" STDOUT_NL, printname); return 0; } /* Print the match count if wanted */ -if (count_only) +if (count_only && !quiet) { if (count > 0 || !omit_zero_count) { if (printname != NULL && filenames != FN_NONE) fprintf(stdout, "%s:", printname); - fprintf(stdout, "%d\n", count); + fprintf(stdout, "%d" STDOUT_NL, count); + counts_printed++; } } +total_count += count; /* Can be set without count_only */ return rc; } @@ -2223,6 +2726,36 @@ if (isdirectory(pathname)) } } +#ifdef WIN32 +if (iswild(pathname)) + { + char buffer[1024]; + char *nextfile; + char *name; + directory_type *dir = opendirectory(pathname); + + if (dir == NULL) + return 0; + + for (nextfile = name = pathname; *nextfile != 0; nextfile++) + if (*nextfile == '/' || *nextfile == '\\') + name = nextfile + 1; + *name = 0; + + while ((nextfile = readdirectory(dir)) != NULL) + { + int frc; + sprintf(buffer, "%.512s%.128s", pathname, nextfile); + frc = grep_or_recurse(buffer, dir_recurse, FALSE); + if (frc > 1) rc = frc; + else if (frc == 0 && rc == 1) rc = 0; + } + + closedirectory(dir); + return rc; + } +#endif + #if defined NATIVE_ZOS } #endif @@ -2387,6 +2920,7 @@ switch(letter) case 'q': quiet = TRUE; break; case 'r': dee_action = dee_RECURSE; break; case 's': silent = TRUE; break; + case 't': show_total_count = TRUE; break; case 'u': options |= PCRE2_UTF; utf = TRUE; break; case 'v': invert = TRUE; break; case 'w': process_options |= PO_WORD_MATCH; break; @@ -2396,7 +2930,7 @@ switch(letter) { unsigned char buffer[128]; (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer); - fprintf(stdout, "pcre2grep version %s\n", buffer); + fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer); } pcre2grep_exit(0); break; @@ -2421,10 +2955,12 @@ return options; static char * ordin(int n) { -static char buffer[8]; +static char buffer[14]; char *p = buffer; sprintf(p, "%d", n); while (*p != 0) p++; +n %= 100; +if (n >= 11 && n <= 13) n = 0; switch (n%10) { case 1: strcpy(p, "st"); break; @@ -2488,9 +3024,20 @@ if ((popts & PO_FIXED_STRINGS) != 0) } sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); -p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset, - compile_context); -if (p->compiled != NULL) return TRUE; +p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode, + &erroffset, compile_context); + +/* Handle successful compile. Try JIT-compiling if supported and enabled. We +ignore any JIT compiler errors, relying falling back to interpreting if +anything goes wrong with JIT. */ + +if (p->compiled != NULL) + { +#ifdef SUPPORT_PCRE2GREP_JIT + if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE); +#endif + return TRUE; + } /* Handle compile errors */ @@ -2538,7 +3085,7 @@ read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts) { int linenumber = 0; FILE *f; -char *filename; +const char *filename; char buffer[PATBUFSIZE]; if (strcmp(name, "-") == 0) @@ -2623,6 +3170,16 @@ const char *locale_from = "--locale"; pcre2_jit_stack *jit_stack = NULL; #endif +/* In Windows, stdout is set up as a text stream, which means that \n is +converted to \r\n. This causes output lines that are copied from the input to +change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure +that stdout is a binary stream. Note that this means all other output to stdout +must use STDOUT_NL to terminate lines. */ + +#ifdef WIN32 +_setmode(_fileno(stdout), _O_BINARY); +#endif + /* Set up a default compile and match contexts and a match data block. */ compile_context = pcre2_compile_context_create(NULL); @@ -2630,6 +3187,13 @@ match_context = pcre2_match_context_create(NULL); match_data = pcre2_match_data_create(OFFSET_SIZE, NULL); offsets = pcre2_get_ovector_pointer(match_data); +/* If string (script) callouts are supported, set up the callout processing +function. */ + +#ifdef SUPPORT_PCRE2GREP_CALLOUT +pcre2_set_callout(match_context, pcre2grep_callout, NULL); +#endif + /* Process the options */ for (i = 1; i < argc; i++) @@ -2836,7 +3400,7 @@ for (i = 1; i < argc; i++) switch (op->one_char) { case N_COLOUR: - colour_option = (char *)"auto"; + colour_option = "auto"; break; case 'o': @@ -2977,7 +3541,7 @@ LC_ALL environment variable is set, and if so, use it. */ if (locale == NULL) { locale = getenv("LC_ALL"); - locale_from = "LCC_ALL"; + locale_from = "LC_ALL"; } if (locale == NULL) @@ -3005,7 +3569,11 @@ if (locale != NULL) if (colour_option != NULL && strcmp(colour_option, "never") != 0) { - if (strcmp(colour_option, "always") == 0) do_colour = TRUE; + if (strcmp(colour_option, "always") == 0) +#ifdef WIN32 + do_ansi = !is_stdout_tty(), +#endif + do_colour = TRUE; else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); else { @@ -3017,7 +3585,17 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0) { char *cs = getenv("PCRE2GREP_COLOUR"); if (cs == NULL) cs = getenv("PCRE2GREP_COLOR"); - if (cs != NULL) colour_string = cs; + if (cs == NULL) cs = getenv("PCREGREP_COLOUR"); + if (cs == NULL) cs = getenv("PCREGREP_COLOR"); + if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS")); + if (cs == NULL) cs = getenv("GREP_COLOR"); + if (cs != NULL) + { + if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs; + } +#ifdef WIN32 + init_colour_output(); +#endif } } @@ -3087,8 +3665,24 @@ if (jfriedl_XT != 0 || jfriedl_XR != 0) } #endif +/* If use_jit is set, check whether JIT is available. If not, do not try +to use JIT. */ + +if (use_jit) + { + uint32_t answer; + (void)pcre2_config(PCRE2_CONFIG_JIT, &answer); + if (!answer) use_jit = FALSE; + } + /* Get memory for the main buffer. */ +if (bufthird <= 0) + { + fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n"); + goto EXIT2; + } + bufsize = 3*bufthird; main_buffer = (char *)malloc(bufsize); @@ -3230,6 +3824,16 @@ for (; i < argc; i++) else if (frc == 0 && rc == 1) rc = 0; } +/* Show the total number of matches if requested, but not if only one file's +count was printed. */ + +if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY) + { + if (counts_printed != 0 && filenames >= FN_DEFAULT) + fprintf(stdout, "TOTAL:"); + fprintf(stdout, "%d" STDOUT_NL, total_count); + } + EXIT: #ifdef SUPPORT_PCRE2GREP_JIT if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack); diff --git a/pcre2/src/pcre2posix.c b/pcre2/src/pcre2posix.c index da212fc4d..4ecc701c2 100644 --- a/pcre2/src/pcre2posix.c +++ b/pcre2/src/pcre2posix.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -58,16 +58,49 @@ previously been set. */ # define PCRE2POSIX_EXP_DEFN __declspec(dllexport) #endif -/* We include pcre2.h before pcre2_internal.h so that the PCRE2 library -functions are declared as "import" for Windows by defining PCRE2_EXP_DECL as -"import". This is needed even though pcre2_internal.h itself includes pcre2.h, -because it does so after it has set PCRE2_EXP_DECL to "export" if it is not -already set. */ +/* Older versions of MSVC lack snprintf(). This define allows for +warning/error-free compilation and testing with MSVC compilers back to at least +MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define snprintf _snprintf +#endif + + +/* Compile-time error numbers start at this value. It should probably never be +changed. This #define is a copy of the one in pcre2_internal.h. */ + +#define COMPILE_ERROR_BASE 100 + + +/* Standard C headers */ + +#include +#include +#include +#include +#include +#include + +/* PCRE2 headers */ #include "pcre2.h" -#include "pcre2_internal.h" #include "pcre2posix.h" +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order so make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + /* Table to translate PCRE2 compile time error codes into POSIX error codes. Only a few PCRE2 errors with a value greater than 23 turn into special POSIX codes: most go to REG_BADPAT. The second table lists, in pairs, those that @@ -106,7 +139,7 @@ static const int eint1[] = { static const int eint2[] = { 30, REG_ECTYPE, /* unknown POSIX class name */ - 32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */ + 32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */ 37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */ 56, REG_INVARG, /* internal error: unknown newline setting */ }; @@ -144,29 +177,23 @@ static const char *const pstring[] = { PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) { -const char *message, *addmessage; -size_t length, addlength; +int used; +const char *message; -message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? +message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))? "unknown error code" : pstring[errcode]; -length = strlen(message) + 1; -addmessage = " at offset "; -addlength = (preg != NULL && (int)preg->re_erroffset != -1)? - strlen(addmessage) + 6 : 0; - -if (errbuf_size > 0) +if (preg != NULL && (int)preg->re_erroffset != -1) { - if (addlength > 0 && errbuf_size >= length + addlength) - sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); - else - { - strncpy(errbuf, message, errbuf_size - 1); - errbuf[errbuf_size-1] = 0; - } + used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message, + (int)preg->re_erroffset); + } +else + { + used = snprintf(errbuf, errbuf_size, "%s", message); } -return length + addlength; +return used + 1; } @@ -211,11 +238,11 @@ int re_nsub = 0; if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS; if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE; if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL; -if ((cflags & REG_NOSUB) != 0) options |= PCRE2_NO_AUTO_CAPTURE; if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF; if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP; if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY; +preg->re_cflags = cflags; preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroffset, NULL); preg->re_erroffset = erroffset; @@ -223,8 +250,13 @@ preg->re_erroffset = erroffset; if (preg->re_pcre2_code == NULL) { unsigned int i; - if (errorcode < 0) return REG_BADPAT; /* UTF error */ + + /* A negative value is a UTF error; otherwise all error codes are greater + than COMPILE_ERROR_BASE, but check, just in case. */ + + if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT; errorcode -= COMPILE_ERROR_BASE; + if (errorcode < (int)(sizeof(eint1)/sizeof(const int))) return eint1[errorcode]; for (i = 0; i < sizeof(eint2)/(2*sizeof(const int)); i += 2) @@ -235,8 +267,14 @@ if (preg->re_pcre2_code == NULL) (void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code, PCRE2_INFO_CAPTURECOUNT, &re_nsub); preg->re_nsub = (size_t)re_nsub; -if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1; preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL); + +if (preg->re_match_data == NULL) + { + pcre2_code_free(preg->re_pcre2_code); + return REG_ESPACE; + } + return 0; } @@ -248,8 +286,7 @@ return 0; /* A suitable match_data block, large enough to hold all possible captures, was obtained when the pattern was compiled, to save having to allocate and free it -for each match. If REG_NOSUB was specified at compile time, the -PCRE_NO_AUTO_CAPTURE flag will be set. When this is the case, the nmatch and +for each match. If REG_NOSUB was specified at compile time, the nmatch and pmatch arguments are ignored, and the only result is yes/no/error. */ PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION @@ -266,11 +303,11 @@ if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY; ((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ -/* When no string data is being returned, or no vector has been passed in which -to put it, ensure that nmatch is zero. */ +/* When REG_NOSUB was specified, or if no vector has been passed in which to +put captured strings, ensure that nmatch is zero. This will stop any attempt to +write to pmatch. */ -if ((((pcre2_real_code *)(preg->re_pcre2_code))->compile_options & - PCRE2_NO_AUTO_CAPTURE) != 0 || pmatch == NULL) nmatch = 0; +if ((preg->re_cflags & REG_NOSUB) != 0 || pmatch == NULL) nmatch = 0; /* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings. The man page from OS X says "REG_STARTEND affects only the location of the @@ -279,6 +316,7 @@ start location rather than being passed as a PCRE2 "starting offset". */ if ((eflags & REG_STARTEND) != 0) { + if (pmatch == NULL) return REG_INVARG; so = pmatch[0].rm_so; eo = pmatch[0].rm_eo; } @@ -296,11 +334,12 @@ rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code, if (rc >= 0) { size_t i; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); if ((size_t)rc > nmatch) rc = (int)nmatch; for (i = 0; i < (size_t)rc; i++) { - pmatch[i].rm_so = md->ovector[i*2]; - pmatch[i].rm_eo = md->ovector[i*2+1]; + pmatch[i].rm_so = ovector[i*2]; + pmatch[i].rm_eo = ovector[i*2+1]; } for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; return 0; diff --git a/pcre2/src/pcre2posix.h b/pcre2/src/pcre2posix.h index 6f19b51b2..6505976aa 100644 --- a/pcre2/src/pcre2posix.h +++ b/pcre2/src/pcre2posix.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2014 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -56,7 +56,7 @@ extern "C" { #define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */ #define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */ #define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */ -#define REG_NOSUB 0x0020 /* Maps to PCRE2_NO_AUTO_CAPTURE */ +#define REG_NOSUB 0x0020 /* Do not report what was matched */ #define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */ #define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ #define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */ @@ -98,6 +98,7 @@ typedef struct { void *re_match_data; size_t re_nsub; size_t re_erroffset; + int re_cflags; } regex_t; /* The structure in which a captured offset is returned. */ diff --git a/pcre2/src/pcre2test.c b/pcre2/src/pcre2test.c index 34cc3a5ed..241c22c46 100644 --- a/pcre2/src/pcre2test.c +++ b/pcre2/src/pcre2test.c @@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam. Written by Philip Hazel Original code Copyright (c) 1997-2012 University of Cambridge - Rewritten code Copyright (c) 2015 University of Cambridge + Rewritten code Copyright (c) 2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -66,6 +66,14 @@ it references only the enabled library functions. */ #include #include +#if defined NATIVE_ZOS +#include "pcrzoscs.h" +/* That header is not included in the main PCRE2 distribution because other +apparatus is needed to compile pcre2test for z/OS. The header can be found in +the special z/OS distribution, which is available from www.zaconsultants.net or +from www.cbttape.org. */ +#endif + #ifdef HAVE_UNISTD_H #include #endif @@ -150,6 +158,13 @@ patterns. */ void vms_setsymbol( char *, char *, int ); #endif +/* VC doesn't support "%td". */ +#ifdef _MSC_VER +#define PTR_SPEC "%lu" +#else +#define PTR_SPEC "%td" +#endif + /* ------------------End of system-specific definitions -------------------- */ /* Glueing macros that are used in several places below. */ @@ -167,20 +182,20 @@ void vms_setsymbol( char *, char *, int ); #endif #endif -#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */ +#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */ #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ #define DEFAULT_OVECCOUNT 15 /* Default ovector count */ #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ #define LOCALESIZE 32 /* Size of locale name */ #define LOOPREPEAT 500000 /* Default loop count for timing */ #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */ -#define REPLACE_MODSIZE 96 /* Field for reading 8-bit replacement */ +#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */ #define VERSION_SIZE 64 /* Size of buffer for the version strings */ /* Make sure the buffer into which replacement strings are copied is big enough to hold them as 32-bit code units. */ -#define REPLACE_BUFFSIZE (4*REPLACE_MODSIZE) +#define REPLACE_BUFFSIZE 1024 /* This is a byte value */ /* Execution modes */ @@ -203,7 +218,7 @@ systems that differ in their output from isprint() even in the "C" locale. */ #define PRINTABLE(c) ((c) >= 32 && (c) < 127) #endif -#define PRINTOK(c) ((locale_tables != NULL)? isprint(c) : PRINTABLE(c)) +#define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c)) /* We have to include some of the library source files because we need to use some of the macros, internal structure definitions, and other internal @@ -231,6 +246,22 @@ of PRIV avoids name clashes. */ #include "pcre2_tables.c" #include "pcre2_ucd.c" +/* 32-bit integer values in the input are read by strtoul() or strtol(). The +check needed for overflow depends on whether long ints are in fact longer than +ints. They are defined not to be shorter. */ + +#if ULONG_MAX > UINT32_MAX +#define U32OVERFLOW(x) (x > UINT32_MAX) +#else +#define U32OVERFLOW(x) (x == UINT32_MAX) +#endif + +#if LONG_MAX > INT32_MAX +#define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN) +#else +#define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN) +#endif + /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include pcre2_intmodedep.h, which is where mode-dependent macros and structures are defined. We can now include it for each supported code unit width. Because @@ -328,17 +359,19 @@ typedef struct cmdstruct { int value; } cmdstruct; -enum { CMD_FORBID_UTF, CMD_LOAD, CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_SAVE, - CMD_SUBJECT, CMD_UNKNOWN }; +enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN, + CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN }; static cmdstruct cmdlist[] = { - { "forbid_utf", CMD_FORBID_UTF }, - { "load", CMD_LOAD }, - { "pattern", CMD_PATTERN }, - { "perltest", CMD_PERLTEST }, - { "pop", CMD_POP }, - { "save", CMD_SAVE }, - { "subject", CMD_SUBJECT }}; + { "forbid_utf", CMD_FORBID_UTF }, + { "load", CMD_LOAD }, + { "newline_default", CMD_NEWLINE_DEFAULT }, + { "pattern", CMD_PATTERN }, + { "perltest", CMD_PERLTEST }, + { "pop", CMD_POP }, + { "popcopy", CMD_POPCOPY }, + { "save", CMD_SAVE }, + { "subject", CMD_SUBJECT }}; #define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct) @@ -370,38 +403,56 @@ enum { MOD_CTC, /* Applies to a compile context */ MOD_NL, /* Is a newline value */ MOD_NN, /* Is a number or a name; more than one may occur */ MOD_OPT, /* Is an option bit */ + MOD_SIZ, /* Is a PCRE2_SIZE value */ MOD_STR }; /* Is a string */ /* Control bits. Some apply to compiling, some to matching, but some can be set -either on a pattern or a data line, so they must all be distinct. */ +either on a pattern or a data line, so they must all be distinct. There are now +so many of them that they are split into two fields. */ -#define CTL_AFTERTEXT 0x00000001u -#define CTL_ALLAFTERTEXT 0x00000002u -#define CTL_ALLCAPTURES 0x00000004u -#define CTL_ALLUSEDTEXT 0x00000008u -#define CTL_ALTGLOBAL 0x00000010u -#define CTL_BINCODE 0x00000020u -#define CTL_CALLOUT_CAPTURE 0x00000040u -#define CTL_CALLOUT_INFO 0x00000080u -#define CTL_CALLOUT_NONE 0x00000100u -#define CTL_DFA 0x00000200u -#define CTL_FINDLIMITS 0x00000400u -#define CTL_FULLBINCODE 0x00000800u -#define CTL_GETALL 0x00001000u -#define CTL_GLOBAL 0x00002000u -#define CTL_HEXPAT 0x00004000u -#define CTL_INFO 0x00008000u -#define CTL_JITFAST 0x00010000u -#define CTL_JITVERIFY 0x00020000u -#define CTL_MARK 0x00040000u -#define CTL_MEMORY 0x00080000u -#define CTL_POSIX 0x00100000u -#define CTL_PUSH 0x00200000u -#define CTL_STARTCHAR 0x00400000u -#define CTL_ZERO_TERMINATE 0x00800000u +#define CTL_AFTERTEXT 0x00000001u +#define CTL_ALLAFTERTEXT 0x00000002u +#define CTL_ALLCAPTURES 0x00000004u +#define CTL_ALLUSEDTEXT 0x00000008u +#define CTL_ALTGLOBAL 0x00000010u +#define CTL_BINCODE 0x00000020u +#define CTL_CALLOUT_CAPTURE 0x00000040u +#define CTL_CALLOUT_INFO 0x00000080u +#define CTL_CALLOUT_NONE 0x00000100u +#define CTL_DFA 0x00000200u +#define CTL_EXPAND 0x00000400u +#define CTL_FINDLIMITS 0x00000800u +#define CTL_FULLBINCODE 0x00001000u +#define CTL_GETALL 0x00002000u +#define CTL_GLOBAL 0x00004000u +#define CTL_HEXPAT 0x00008000u /* Same word as USE_LENGTH */ +#define CTL_INFO 0x00010000u +#define CTL_JITFAST 0x00020000u +#define CTL_JITVERIFY 0x00040000u +#define CTL_MARK 0x00080000u +#define CTL_MEMORY 0x00100000u +#define CTL_NULLCONTEXT 0x00200000u +#define CTL_POSIX 0x00400000u +#define CTL_POSIX_NOSUB 0x00800000u +#define CTL_PUSH 0x01000000u /* These three must be */ +#define CTL_PUSHCOPY 0x02000000u /* all in the same */ +#define CTL_PUSHTABLESCOPY 0x04000000u /* word. */ +#define CTL_STARTCHAR 0x08000000u +#define CTL_USE_LENGTH 0x10000000u /* Same word as HEXPAT */ +#define CTL_UTF8_INPUT 0x20000000u +#define CTL_ZERO_TERMINATE 0x40000000u -#define CTL_BSR_SET 0x80000000u /* This is informational */ -#define CTL_NL_SET 0x40000000u /* This is informational */ +/* Second control word */ + +#define CTL2_SUBSTITUTE_EXTENDED 0x00000001u +#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u +#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u +#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u + +#define CTL_NL_SET 0x40000000u /* Informational */ +#define CTL_BSR_SET 0x80000000u /* Informational */ + +/* Combinations */ #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */ #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO) @@ -418,7 +469,13 @@ data line. */ CTL_GLOBAL|\ CTL_MARK|\ CTL_MEMORY|\ - CTL_STARTCHAR) + CTL_STARTCHAR|\ + CTL_UTF8_INPUT) + +#define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\ + CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\ + CTL2_SUBSTITUTE_UNKNOWN_UNSET|\ + CTL2_SUBSTITUTE_UNSET_EMPTY) /* Structures for holding modifier information for patterns and subject strings (data). Fields containing modifiers that can be set either for a pattern or a @@ -428,10 +485,12 @@ same offset in the big table below works for both. */ typedef struct patctl { /* Structure for pattern modifiers. */ uint32_t options; /* Must be in same position as datctl */ uint32_t control; /* Must be in same position as datctl */ + uint32_t control2; /* Must be in same position as datctl */ uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ uint32_t jit; uint32_t stackguard_test; uint32_t tables_id; + uint32_t regerror_buffsize; uint8_t locale[LOCALESIZE]; } patctl; @@ -441,7 +500,9 @@ typedef struct patctl { /* Structure for pattern modifiers. */ typedef struct datctl { /* Structure for data line modifiers. */ uint32_t options; /* Must be in same position as patctl */ uint32_t control; /* Must be in same position as patctl */ + uint32_t control2; /* Must be in same position as patctl */ uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ + uint32_t cerror[2]; uint32_t cfail[2]; int32_t callout_data; int32_t copy_numbers[MAXCPYGET]; @@ -481,82 +542,101 @@ typedef struct modstruct { } modstruct; static modstruct modlist[] = { - { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) }, - { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, - { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) }, - { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) }, - { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) }, - { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) }, - { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) }, - { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, - { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, - { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, - { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, - { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, - { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, - { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) }, - { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, - { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) }, - { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, - { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, - { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, - { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) }, - { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) }, - { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) }, - { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) }, - { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, - { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, - { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, - { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) }, - { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) }, - { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) }, - { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) }, - { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) }, - { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) }, - { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) }, - { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) }, - { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, - { "jit", MOD_PAT, MOD_IND, 7, PO(jit) }, - { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) }, - { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) }, - { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) }, - { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) }, - { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) }, - { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) }, - { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) }, - { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) }, - { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, - { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) }, - { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, - { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, - { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, - { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, - { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, - { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, - { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, - { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, - { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, - { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) }, - { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, - { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, - { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, - { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, - { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, - { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, - { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, - { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, - { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, - { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, - { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, - { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, - { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, - { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, - { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, - { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, - { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, - { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, - { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }, - { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) } + { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) }, + { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, + { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) }, + { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) }, + { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) }, + { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) }, + { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) }, + { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) }, + { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, + { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, + { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, + { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, + { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, + { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, + { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) }, + { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) }, + { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, + { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) }, + { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, + { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, + { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, + { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) }, + { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) }, + { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) }, + { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) }, + { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, + { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, + { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, + { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) }, + { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) }, + { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) }, + { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) }, + { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) }, + { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) }, + { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) }, + { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) }, + { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) }, + { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, + { "jit", MOD_PAT, MOD_IND, 7, PO(jit) }, + { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) }, + { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) }, + { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) }, + { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) }, + { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) }, + { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) }, + { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) }, + { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) }, + { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) }, + { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, + { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) }, + { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, + { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, + { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, + { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, + { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, + { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, + { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) }, + { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, + { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, + { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, + { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) }, + { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, + { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, + { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) }, + { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, + { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, + { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, + { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, + { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, + { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, + { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, + { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, + { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) }, + { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, + { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, + { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) }, + { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) }, + { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, + { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) }, + { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, + { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, + { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, + { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) }, + { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) }, + { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) }, + { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) }, + { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) }, + { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, + { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, + { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, + { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) }, + { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) }, + { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }, + { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) }, + { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) } }; #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct) @@ -564,34 +644,57 @@ static modstruct modlist[] = { /* Controls and options that are supported for use with the POSIX interface. */ #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \ - PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ - PCRE2_UCP|PCRE2_UTF|PCRE2_UNGREEDY) + PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \ + PCRE2_UNGREEDY) #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \ - CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_POSIX) + CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB) + +#define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0) #define POSIX_SUPPORTED_MATCH_OPTIONS ( \ PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL) -#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) - -/* Controls that are mutually exclusive. */ - -#define EXCLUSIVE_DAT_CONTROLS (CTL_ALLUSEDTEXT|CTL_STARTCHAR) +#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) +#define POSIX_SUPPORTED_MATCH_CONTROLS2 (0) /* Control bits that are not ignored with 'push'. */ #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \ CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \ - CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_BSR_SET|CTL_NL_SET) + CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY| \ + CTL_USE_LENGTH) + +#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL_BSR_SET|CTL_NL_SET) /* Controls that apply only at compile time with 'push'. */ -#define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY +#define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY +#define PUSH_COMPILE_ONLY_CONTROLS2 (0) -/* Controls that are forbidden with #pop. */ +/* Controls that are forbidden with #pop or #popcopy. */ -#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_PUSH) +#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \ + CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH) + +/* Pattern controls that are mutually exclusive. At present these are all in +the first control word. Note that CTL_POSIX_NOSUB is always accompanied by +CTL_POSIX, so it doesn't need its own entries. */ + +static uint32_t exclusive_pat_controls[] = { + CTL_POSIX | CTL_HEXPAT, + CTL_POSIX | CTL_PUSH, + CTL_POSIX | CTL_PUSHCOPY, + CTL_POSIX | CTL_PUSHTABLESCOPY, + CTL_POSIX | CTL_USE_LENGTH, + CTL_EXPAND | CTL_HEXPAT }; + +/* Data controls that are mutually exclusive. At present these are all in the +first control word. */ + +static uint32_t exclusive_dat_controls[] = { + CTL_ALLUSEDTEXT | CTL_STARTCHAR, + CTL_FINDLIMITS | CTL_NULLCONTEXT }; /* Table of single-character abbreviated modifiers. The index field is initialized to -1, but the first time the modifier is encountered, it is filled @@ -648,6 +751,12 @@ table itself easier to read. */ #define EBCDIC_NL 0 #endif +#ifdef NEVER_BACKSLASH_C +#define BACKSLASH_C 0 +#else +#define BACKSLASH_C 1 +#endif + typedef struct coptstruct { const char *name; uint32_t type; @@ -662,16 +771,17 @@ enum { CONF_BSR, }; static coptstruct coptlist[] = { - { "bsr", CONF_BSR, PCRE2_CONFIG_BSR }, - { "ebcdic", CONF_FIX, SUPPORT_EBCDIC }, - { "ebcdic-nl", CONF_FIZ, EBCDIC_NL }, - { "jit", CONF_INT, PCRE2_CONFIG_JIT }, - { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE }, - { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE }, - { "pcre2-16", CONF_FIX, SUPPORT_16 }, - { "pcre2-32", CONF_FIX, SUPPORT_32 }, - { "pcre2-8", CONF_FIX, SUPPORT_8 }, - { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE } + { "backslash-C", CONF_FIX, BACKSLASH_C }, + { "bsr", CONF_BSR, PCRE2_CONFIG_BSR }, + { "ebcdic", CONF_FIX, SUPPORT_EBCDIC }, + { "ebcdic-nl", CONF_FIZ, EBCDIC_NL }, + { "jit", CONF_INT, PCRE2_CONFIG_JIT }, + { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE }, + { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE }, + { "pcre2-16", CONF_FIX, SUPPORT_16 }, + { "pcre2-32", CONF_FIX, SUPPORT_32 }, + { "pcre2-8", CONF_FIX, SUPPORT_8 }, + { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE } }; #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct) @@ -697,6 +807,7 @@ static BOOL restrict_for_perl_test = FALSE; static BOOL show_memory = FALSE; static int code_unit_size; /* Bytes */ +static int jitrc; /* Return from JIT compile */ static int test_mode = DEFAULT_TEST_MODE; static int timeit = 0; static int timeitm = 0; @@ -711,6 +822,8 @@ static uint32_t maxlookbehind; static uint32_t max_oveccount; static uint32_t callout_count; +static uint16_t local_newline_default = 0; + static VERSION_TYPE jittarget[VERSION_SIZE]; static VERSION_TYPE version[VERSION_SIZE]; static VERSION_TYPE uversion[VERSION_SIZE]; @@ -724,11 +837,12 @@ static void *patstack[PATSTACKSIZE]; static int patstacknext = 0; #ifdef SUPPORT_PCRE2_8 -static regex_t preg = { NULL, NULL, 0, 0 }; +static regex_t preg = { NULL, NULL, 0, 0, 0 }; #endif static int *dfa_workspace = NULL; static const uint8_t *locale_tables = NULL; +static const uint8_t *use_tables = NULL; static uint8_t locale_name[32]; /* We need buffers for building 16/32-bit strings; 8-bit strings don't need @@ -737,7 +851,7 @@ buffer is where all input lines are read. Its size is the same as pbuffer8. Pattern lines are always copied to pbuffer8 for use in callouts, even if they are actually compiled from pbuffer16 or pbuffer32. */ -static int pbuffer8_size = 50000; /* Initial size, bytes */ +static size_t pbuffer8_size = 50000; /* Initial size, bytes */ static uint8_t *pbuffer8 = NULL; static uint8_t *buffer = NULL; @@ -856,21 +970,45 @@ are supported. */ a = pcre2_callout_enumerate_32(compiled_code32, \ (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = pcre2_code_copy_8(b); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = pcre2_code_copy_16(b); \ + else \ + G(a,32) = pcre2_code_copy_32(b) + +#define PCRE2_CODE_COPY_TO_VOID(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = (void *)pcre2_code_copy_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = (void *)pcre2_code_copy_16(G(b,16)); \ + else \ + a = (void *)pcre2_code_copy_32(G(b,32)) + +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \ + else \ + a = (void *)pcre2_code_copy_with_tables_32(G(b,32)) + #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ if (test_mode == PCRE8_MODE) \ - G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8)); \ + G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \ else if (test_mode == PCRE16_MODE) \ - G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,G(g,16)); \ + G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \ else \ - G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,G(g,32)) + G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g) #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ if (test_mode == PCRE8_MODE) \ - a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8),i,j); \ + a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \ else if (test_mode == PCRE16_MODE) \ - a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16),i,j); \ + a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \ else \ - a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32),i,j) + a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ if (test_mode == PCRE8_MODE) \ @@ -896,10 +1034,10 @@ are supported. */ else \ a = pcre2_get_startchar_32(G(b,32)) -#define PCRE2_JIT_COMPILE(a,b) \ - if (test_mode == PCRE8_MODE) pcre2_jit_compile_8(G(a,8),b); \ - else if (test_mode == PCRE16_MODE) pcre2_jit_compile_16(G(a,16),b); \ - else pcre2_jit_compile_32(G(a,32),b) +#define PCRE2_JIT_COMPILE(r,a,b) \ + if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \ + else r = pcre2_jit_compile_32(G(a,32),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \ @@ -908,11 +1046,11 @@ are supported. */ #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ if (test_mode == PCRE8_MODE) \ - a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8)); \ + a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ else if (test_mode == PCRE16_MODE) \ - a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16)); \ + a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ else \ - a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32)) + a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ if (test_mode == PCRE8_MODE) \ @@ -945,11 +1083,11 @@ are supported. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ if (test_mode == PCRE8_MODE) \ - a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8)); \ + a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ else if (test_mode == PCRE16_MODE) \ - a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16)); \ + a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ else \ - a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32)) + a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) \ if (test_mode == PCRE8_MODE) \ @@ -1055,6 +1193,22 @@ are supported. */ else \ pcre2_set_match_limit_32(G(a,32),b) +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_max_pattern_length_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_max_pattern_length_16(G(a,16),b); \ + else \ + pcre2_set_max_pattern_length_32(G(a,32),b) + +#define PCRE2_SET_OFFSET_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_offset_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_offset_limit_16(G(a,16),b); \ + else \ + pcre2_set_offset_limit_32(G(a,32),b) + #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ if (test_mode == PCRE8_MODE) \ pcre2_set_parens_nest_limit_8(G(a,8),b); \ @@ -1291,19 +1445,37 @@ the three different cases. */ a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \ (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \ + else \ + G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b) + +#define PCRE2_CODE_COPY_TO_VOID(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \ + else \ + a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO)) + +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \ + else \ + a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO)) + #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,G(g,BITONE)); \ + G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \ else \ - G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,G(g,BITTWO)) + G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g) #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ - G(g,BITONE),G(h,BITONE),i,j); \ + G(g,BITONE),h,i,j); \ else \ a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ - G(g,BITTWO),G(h,BITTWO),i,j) + G(g,BITTWO),h,i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1323,11 +1495,11 @@ the three different cases. */ else \ a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO)) -#define PCRE2_JIT_COMPILE(a,b) \ +#define PCRE2_JIT_COMPILE(r,a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \ + r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \ else \ - G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b) + r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1338,10 +1510,10 @@ the three different cases. */ #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ - G(g,BITONE),G(h,BITONE)); \ + G(g,BITONE),h); \ else \ a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ - G(g,BITTWO),G(h,BITTWO)) + G(g,BITTWO),h) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1370,10 +1542,10 @@ the three different cases. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ - G(g,BITONE),G(h,BITONE)); \ + G(g,BITONE),h); \ else \ a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ - G(g,BITTWO),G(h,BITTWO)) + G(g,BITTWO),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1455,6 +1627,18 @@ the three different cases. */ else \ G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b) +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_OFFSET_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b) + #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \ @@ -1614,18 +1798,21 @@ the three different cases. */ #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ a = pcre2_callout_enumerate_8(compiled_code8, \ (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b) +#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8)) +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8)) #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ - G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8)) + G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g) #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ - a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8),i,j) + a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)) #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8)) #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8)) -#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b) +#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8)) #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ - a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8)) + a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1633,7 +1820,7 @@ the three different cases. */ #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL) #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ - a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8)) + a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c) #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c) @@ -1653,6 +1840,8 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_8(G(a,8),b,c) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ @@ -1705,18 +1894,21 @@ the three different cases. */ #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ a = pcre2_callout_enumerate_16(compiled_code16, \ (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b) +#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16)) +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16)) #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ - G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,G(g,16)) + G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g) #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ - a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16),i,j) + a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)) #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16)) #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16)) -#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b) +#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16)) #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ - a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16)) + a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1724,7 +1916,7 @@ the three different cases. */ #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL) #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ - a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16)) + a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c) #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c) @@ -1744,6 +1936,8 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_16(G(a,16),b,c) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ @@ -1796,18 +1990,21 @@ the three different cases. */ #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ a = pcre2_callout_enumerate_32(compiled_code32, \ (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b) +#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32)) +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32)) #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ - G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,G(g,32)) + G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g) #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ - a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32),i,j) + a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size)) #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32)) #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32)) -#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b) +#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b) #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32)) #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ - a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32)) + a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1815,7 +2012,7 @@ the three different cases. */ #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a); #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL) #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ - a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32)) + a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c) #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) @@ -1835,6 +2032,8 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_32(G(a,32),b,c) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ @@ -2203,6 +2402,27 @@ static const uint8_t tables2[] = { }; +#ifndef HAVE_STRERROR +/************************************************* +* Provide strerror() for non-ANSI libraries * +*************************************************/ + +/* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their +libraries. They may no longer be around, but just in case, we can try to +provide the same facility by this simple alternative function. */ + +extern int sys_nerr; +extern char *sys_errlist[]; + +char * +strerror(int n) +{ +if (n < 0 || n >= sys_nerr) return "unknown error number"; +return sys_errlist[n]; +} +#endif /* HAVE_STRERROR */ + + /************************************************* * Local memory functions * @@ -2363,6 +2583,8 @@ static int pchar(uint32_t c, BOOL utf, FILE *f) { int n = 0; +char tempbuffer[16]; + if (PRINTOK(c)) { if (f != NULL) fprintf(f, "%c", c); @@ -2384,6 +2606,8 @@ if (c < 0x100) } if (f != NULL) n = fprintf(f, "\\x{%02x}", c); + else n = sprintf(tempbuffer, "\\x{%02x}", c); + return n >= 0 ? n : 0; } @@ -2424,13 +2648,15 @@ return (int)(pp - p); *************************************************/ /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. -If handed a NULL file, just counts chars without printing. */ +For printing *MARK strings, a negative length is given. If handed a NULL file, +just counts chars without printing (because pchar() does that). */ static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f) { uint32_t c = 0; int yield = 0; -if (length < 0) length = strlen((char *)p); + +if (length < 0) length = p[-1]; while (length-- > 0) { if (utf) @@ -2447,6 +2673,7 @@ while (length-- > 0) c = *p++; yield += pchar(c, utf, f); } + return yield; } #endif @@ -2458,12 +2685,13 @@ return yield; *************************************************/ /* Must handle UTF-16 strings in utf mode. Yields number of characters printed. -If handed a NULL file, just counts chars without printing. */ +For printing *MARK strings, a negative length is given. If handed a NULL file, +just counts chars without printing. */ static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f) { int yield = 0; -if (length < 0) length = strlen16(p); +if (length < 0) length = p[-1]; while (length-- > 0) { uint32_t c = *p++ & 0xffff; @@ -2491,13 +2719,14 @@ return yield; *************************************************/ /* Must handle UTF-32 strings in utf mode. Yields number of characters printed. -If handed a NULL file, just counts chars without printing. */ +For printing *MARK strings, a negative length is given.If handed a NULL file, +just counts chars without printing. */ static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f) { int yield = 0; (void)(utf); /* Avoid compiler warning */ -if (length < 0) length = strlen32(p); +if (length < 0) length = p[-1]; while (length-- > 0) { uint32_t c = *p++; @@ -2528,7 +2757,7 @@ Returns: number of characters placed in the buffer static int ord2utf8(uint32_t cvalue, uint8_t *utf8bytes) { -register int i, j; +int i, j; if (cvalue > 0x7fffffffu) return -1; for (i = 0; i < utf8_table1_size; i++) @@ -2548,16 +2777,22 @@ return i + 1; #ifdef SUPPORT_PCRE2_16 /************************************************* -* Convert pattern to 16-bit * +* Convert string to 16-bit * *************************************************/ -/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If -all the input bytes are ASCII, the space needed for a 16-bit string is exactly -double the 8-bit size. Otherwise, the size needed for a 16-bit string is no -more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but -possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in -UTF-16. The result is always left in pbuffer16. Impose a minimum size to save -repeated re-sizing. +/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using +the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and +code values from 0 to 0x7fffffff. However, values greater than the later UTF +limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as +UTF-8 if the utf8_input modifier is set, but an error is generated for values +greater than 0xffff. + +If all the input bytes are ASCII, the space needed for a 16-bit string is +exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string +is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 +but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes +in UTF-16. The result is always left in pbuffer16. Impose a minimum size to +save repeated re-sizing. Note that this function does not object to surrogate values. This is deliberate; it makes it possible to construct UTF-16 strings that are invalid, @@ -2565,7 +2800,7 @@ for the purpose of testing that they are correctly faulted. Arguments: p points to a byte string - utf non-zero if converting to UTF-16 + utf true in UTF mode lenptr points to number of bytes in the string (excluding trailing zero) Returns: 0 on success, with the length updated to the number of 16-bit @@ -2596,21 +2831,21 @@ if (pbuffer16_size < 2*len + 2) } pp = pbuffer16; -if (!utf) +if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0) { - while (len-- > 0) *pp++ = *p++; + for (; len > 0; len--) *pp++ = *p++; } else while (len > 0) { uint32_t c; int chlen = utf82ord(p, &c); if (chlen <= 0) return -1; + if (!utf && c > 0xffff) return -3; if (c > 0x10ffff) return -2; p += chlen; len -= chlen; if (c < 0x10000) *pp++ = c; else { - if (!utf) return -3; c -= 0x10000; *pp++ = 0xD800 | (c >> 10); *pp++ = 0xDC00 | (c & 0x3ff); @@ -2627,15 +2862,25 @@ return 0; #ifdef SUPPORT_PCRE2_32 /************************************************* -* Convert pattern to 32-bit * +* Convert string to 32-bit * *************************************************/ -/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If -all the input bytes are ASCII, the space needed for a 32-bit string is exactly -four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no -more than four times, because the number of characters must be less than the -number of bytes. The result is always left in pbuffer32. Impose a minimum size -to save repeated re-sizing. +/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using +the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and +code values from 0 to 0x7fffffff. However, values greater than the later UTF +limit of 0x10ffff cause an error. + +In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier +is set, and no limit is imposed. There is special interpretation of the 0xff +byte (which is illegal in UTF-8) in this case: it causes the top bit of the +next character to be set. This provides a way of generating 32-bit characters +greater than 0x7fffffff. + +If all the input bytes are ASCII, the space needed for a 32-bit string is +exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit +string is no more than four times, because the number of characters must be +less than the number of bytes. The result is always left in pbuffer32. Impose a +minimum size to save repeated re-sizing. Note that this function does not object to surrogate values. This is deliberate; it makes it possible to construct UTF-32 strings that are invalid, @@ -2643,7 +2888,7 @@ for the purpose of testing that they are correctly faulted. Arguments: p points to a byte string - utf true if UTF-8 (to be converted to UTF-32) + utf true in UTF mode lenptr points to number of bytes in the string (excluding trailing zero) Returns: 0 on success, with the length updated to the number of 32-bit @@ -2673,19 +2918,29 @@ if (pbuffer32_size < 4*len + 4) } pp = pbuffer32; -if (!utf) + +if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0) { - while (len-- > 0) *pp++ = *p++; + for (; len > 0; len--) *pp++ = *p++; } + else while (len > 0) { + int chlen; uint32_t c; - int chlen = utf82ord(p, &c); + uint32_t topbit = 0; + if (!utf && *p == 0xff && len > 1) + { + topbit = 0x80000000u; + p++; + len--; + } + chlen = utf82ord(p, &c); if (chlen <= 0) return -1; if (utf && c > 0x10ffff) return -2; p += chlen; len -= chlen; - *pp++ = c; + *pp++ = c | topbit; } *pp = 0; @@ -2715,9 +2970,8 @@ Returns: a possibly changed offset static PCRE2_SIZE backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf) { -long int yield; - -if (!utf || test_mode == PCRE32_MODE) yield = offset - count; +if (!utf || test_mode == PCRE32_MODE) + return (count >= offset)? 0 : (offset - count); else if (test_mode == PCRE8_MODE) { @@ -2727,7 +2981,7 @@ else if (test_mode == PCRE8_MODE) pp--; while ((*pp & 0xc0) == 0x80) pp--; } - yield = pp - (PCRE2_SPTR8)subject; + return pp - (PCRE2_SPTR8)subject; } else /* 16-bit mode */ @@ -2738,13 +2992,51 @@ else /* 16-bit mode */ pp--; if ((*pp & 0xfc00) == 0xdc00) pp--; } - yield = pp - (PCRE2_SPTR16)subject; + return pp - (PCRE2_SPTR16)subject; + } +} + + + +/************************************************* +* Expand input buffers * +*************************************************/ + +/* This function doubles the size of the input buffer and the buffer for +keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to +the new ones. + +Arguments: none +Returns: nothing (aborts if malloc() fails) +*/ + +static void +expand_input_buffers(void) +{ +int new_pbuffer8_size = 2*pbuffer8_size; +uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size); +uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size); + +if (new_buffer == NULL || new_pbuffer8 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size); + exit(1); } -return (yield >= 0)? yield : 0; +memcpy(new_buffer, buffer, pbuffer8_size); +memcpy(new_pbuffer8, pbuffer8, pbuffer8_size); + +pbuffer8_size = new_pbuffer8_size; + +free(buffer); +free(pbuffer8); + +buffer = new_buffer; +pbuffer8 = new_pbuffer8; } + /************************************************* * Read or extend an input line * *************************************************/ @@ -2752,10 +3044,11 @@ return (yield >= 0)? yield : 0; /* Input lines are read into buffer, but both patterns and data lines can be continued over multiple input lines. In addition, if the buffer fills up, we want to automatically expand it so as to be able to handle extremely large -lines that are needed for certain stress tests. When the input buffer is -expanded, the other two buffers must also be expanded likewise, and the -contents of pbuffer, which are a copy of the input for callouts, must be -preserved (for when expansion happens for a data line). This is not the most +lines that are needed for certain stress tests, although this is less likely +now that there are repetition features for both patterns and data. When the +input buffer is expanded, the other two buffers must also be expanded likewise, +and the contents of pbuffer, which are a copy of the input for callouts, must +be preserved (for when expansion happens for a data line). This is not the most optimal way of handling this, but hey, this is just a test program! Arguments: @@ -2779,7 +3072,7 @@ for (;;) if (rlen > 1000) { - int dlen; + size_t dlen; /* If libreadline or libedit support is required, use readline() to read a line if the input is a terminal. Note that readline() removes the trailing @@ -2810,36 +3103,36 @@ for (;;) return (here == start)? NULL : start; } - dlen = (int)strlen((char *)here); - if (dlen > 0 && here[dlen - 1] == '\n') return start; + dlen = strlen((char *)here); here += dlen; + + /* Check for end of line reached. Take care not to read data from before + start (dlen will be zero for a file starting with a binary zero). */ + + if (here > start && here[-1] == '\n') return start; + + /* If we have not read a newline when reading a file, we have either filled + the buffer or reached the end of the file. We can detect the former by + checking that the string fills the buffer, and the latter by feof(). If + neither of these is true, it means we read a binary zero which has caused + strlen() to give a short length. This is a hard error because pcre2test + expects to work with C strings. */ + + if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f)) + { + fprintf(outfile, "** Binary zero encountered in input\n"); + fprintf(outfile, "** pcre2test run abandoned\n"); + exit(1); + } } else { - int new_pbuffer8_size = 2*pbuffer8_size; - uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size); - uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size); - - if (new_buffer == NULL || new_pbuffer8 == NULL) - { - fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size); - exit(1); - } - - memcpy(new_buffer, buffer, pbuffer8_size); - memcpy(new_pbuffer8, pbuffer8, pbuffer8_size); - - pbuffer8_size = new_pbuffer8_size; - - start = new_buffer + (start - buffer); - here = new_buffer + (here - buffer); - - free(buffer); - free(pbuffer8); - - buffer = new_buffer; - pbuffer8 = new_pbuffer8; + size_t start_offset = start - buffer; + size_t here_offset = here - buffer; + expand_input_buffers(); + start = buffer + start_offset; + here = buffer + here_offset; } } @@ -2874,33 +3167,6 @@ return 0; -/************************************************* -* Read number from string * -*************************************************/ - -/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess -around with conditional compilation, just do the job by hand. It is only used -for unpicking arguments, so just keep it simple. - -Arguments: - str string to be converted - endptr where to put the end pointer - -Returns: the unsigned long -*/ - -static int -get_value(const char *str, const char **endptr) -{ -int result = 0; -while(*str != 0 && isspace(*str)) str++; -while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); -*endptr = str; -return(result); -} - - - /************************************************* * Scan the main modifier list * *************************************************/ @@ -2928,7 +3194,7 @@ while (top > bot) if (c == 0) { if (len == mlen) return mid; - c = len - mlen; + c = (int)len - (int)mlen; } if (c > 0) bot = mid + 1; else top = mid; } @@ -3050,6 +3316,8 @@ static BOOL decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl) { uint8_t *ep, *pp; +long li; +unsigned long uli; BOOL first = TRUE; for (;;) @@ -3066,9 +3334,14 @@ for (;;) while (isspace(*p) || *p == ',') p++; if (*p == 0) break; - /* Find the end of the item. */ + /* Find the end of the item; lose trailing whitespace at end of line. */ - for (ep = p; *ep != 0 && *ep != ',' && !isspace(*ep); ep++); + for (ep = p; *ep != 0 && *ep != ','; ep++); + if (*ep == 0) + { + while (ep > p && isspace(ep[-1])) ep--; + *ep = 0; + } /* Remember if the first character is '-'. */ @@ -3192,8 +3465,8 @@ for (;;) #else *((uint16_t *)field) = PCRE2_BSR_UNICODE; #endif - if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET; - else dctl->control &= ~CTL_BSR_SET; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_BSR_SET; + else dctl->control2 &= ~CTL_BSR_SET; } else { @@ -3202,21 +3475,42 @@ for (;;) else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0) *((uint16_t *)field) = PCRE2_BSR_UNICODE; else goto INVALID_VALUE; - if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET; - else dctl->control |= CTL_BSR_SET; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_BSR_SET; + else dctl->control2 |= CTL_BSR_SET; } pp = ep; break; case MOD_IN2: /* One or two unsigned integers */ if (!isdigit(*pp)) goto INVALID_VALUE; - ((uint32_t *)field)[0] = (uint32_t)strtoul((const char *)pp, &endptr, 10); + uli = strtoul((const char *)pp, &endptr, 10); + if (U32OVERFLOW(uli)) goto INVALID_VALUE; + ((uint32_t *)field)[0] = (uint32_t)uli; if (*endptr == ':') - ((uint32_t *)field)[1] = (uint32_t)strtoul((const char *)endptr+1, &endptr, 10); + { + uli = strtoul((const char *)endptr+1, &endptr, 10); + if (U32OVERFLOW(uli)) goto INVALID_VALUE; + ((uint32_t *)field)[1] = (uint32_t)uli; + } else ((uint32_t *)field)[1] = 0; pp = (uint8_t *)endptr; break; + /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or + less than ULONG_MAX. So first test for overflowing the long int, and then + test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */ + + case MOD_SIZ: /* PCRE2_SIZE value */ + if (!isdigit(*pp)) goto INVALID_VALUE; + uli = strtoul((const char *)pp, &endptr, 10); + if (uli == ULONG_MAX) goto INVALID_VALUE; +#if ULONG_MAX > PCRE2_SIZE_MAX + if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE; +#endif + *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli; + pp = (uint8_t *)endptr; + break; + case MOD_IND: /* Unsigned integer with default */ if (len == 0) { @@ -3227,13 +3521,17 @@ for (;;) case MOD_INT: /* Unsigned integer */ if (!isdigit(*pp)) goto INVALID_VALUE; - *((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10); + uli = strtoul((const char *)pp, &endptr, 10); + if (U32OVERFLOW(uli)) goto INVALID_VALUE; + *((uint32_t *)field) = (uint32_t)uli; pp = (uint8_t *)endptr; break; case MOD_INS: /* Signed integer */ if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE; - *((int32_t *)field) = (int32_t)strtol((const char *)pp, &endptr, 10); + li = strtol((const char *)pp, &endptr, 10); + if (S32OVERFLOW(li)) goto INVALID_VALUE; + *((int32_t *)field) = (int32_t)li; pp = (uint8_t *)endptr; break; @@ -3245,14 +3543,14 @@ for (;;) if (i == 0) { *((uint16_t *)field) = NEWLINE_DEFAULT; - if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET; - else dctl->control &= ~CTL_NL_SET; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_NL_SET; + else dctl->control2 &= ~CTL_NL_SET; } else { *((uint16_t *)field) = i; - if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET; - else dctl->control |= CTL_NL_SET; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_NL_SET; + else dctl->control2 |= CTL_NL_SET; } pp = ep; break; @@ -3261,7 +3559,10 @@ for (;;) if (isdigit(*pp) || *pp == '-') { int ct = MAXCPYGET - 1; - int32_t value = (int32_t)strtol((const char *)pp, &endptr, 10); + int32_t value; + li = strtol((const char *)pp, &endptr, 10); + if (S32OVERFLOW(li)) goto INVALID_VALUE; + value = (int32_t)li; field = (char *)field - m->offset + m->value; /* Adjust field ptr */ if (value >= 0) /* Add new number */ { @@ -3285,10 +3586,16 @@ for (;;) char *nn = (char *)field; if (len > 0) /* Add new name */ { - while (*nn != 0) nn += strlen(nn) + 1; - if (nn + len + 1 - (char *)field > LENCPYGET) + if (len > MAX_NAME_SIZE) { - fprintf(outfile, "** Too many named '%s' modifiers\n", m->name); + fprintf(outfile, "** Group name in '%s' is too long\n", m->name); + return FALSE; + } + while (*nn != 0) nn += strlen(nn) + 1; + if (nn + len + 2 - (char *)field > LENCPYGET) + { + fprintf(outfile, "** Too many characters in named '%s' modifiers\n", + m->name); return FALSE; } memcpy(nn, pp, len); @@ -3405,15 +3712,16 @@ words. Arguments: controls control bits + controls2 more control bits before text to print before Returns: nothing */ static void -show_controls(uint32_t controls, const char *before) +show_controls(uint32_t controls, uint32_t controls2, const char *before) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", @@ -3421,10 +3729,12 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "", ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", ((controls & CTL_BINCODE) != 0)? " bincode" : "", + ((controls2 & CTL_BSR_SET) != 0)? " bsr" : "", ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "", ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", ((controls & CTL_DFA) != 0)? " dfa" : "", + ((controls & CTL_EXPAND) != 0)? " expand" : "", ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "", ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "", ((controls & CTL_GETALL) != 0)? " getall" : "", @@ -3435,9 +3745,20 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", ((controls & CTL_MARK) != 0)? " mark" : "", ((controls & CTL_MEMORY) != 0)? " memory" : "", + ((controls2 & CTL_NL_SET) != 0)? " newline" : "", + ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "", ((controls & CTL_POSIX) != 0)? " posix" : "", + ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "", ((controls & CTL_PUSH) != 0)? " push" : "", + ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "", + ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "", ((controls & CTL_STARTCHAR) != 0)? " startchar" : "", + ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "", + ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "", + ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "", + ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "", + ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "", + ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "", ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : ""); } @@ -3461,10 +3782,11 @@ static void show_compile_options(uint32_t options, const char *before, const char *after) { if (options == 0) fprintf(outfile, "%s %s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", + ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "", ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "", @@ -3486,6 +3808,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", ((options & PCRE2_UCP) != 0)? " ucp" : "", ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "", + ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "", ((options & PCRE2_UTF) != 0)? " utf" : "", after); } @@ -3528,14 +3851,18 @@ show_memory_info(void) uint32_t name_count, name_entry_size; size_t size, cblock_size; +/* One of the test_mode values will always be true, but to stop a compiler +warning we must initialize cblock_size. */ + +cblock_size = 0; #ifdef SUPPORT_PCRE2_8 -if (test_mode == 8) cblock_size = sizeof(pcre2_real_code_8); +if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8); #endif #ifdef SUPPORT_PCRE2_16 -if (test_mode == 16) cblock_size = sizeof(pcre2_real_code_16); +if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16); #endif #ifdef SUPPORT_PCRE2_32 -if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32); +if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32); #endif (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE); @@ -3629,12 +3956,13 @@ if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0) if ((pat_patctl.control & CTL_INFO) != 0) { - const void *nametable; - const uint8_t *start_bits; + void *nametable; + uint8_t *start_bits; BOOL match_limit_set, recursion_limit_set; uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, - hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit, - minlength, nameentrysize, namecount, newline_convention, recursion_limit; + hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, + match_limit, minlength, nameentrysize, namecount, newline_convention, + recursion_limit; /* These info requests may return PCRE2_ERROR_UNSET. */ @@ -3674,6 +4002,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) + pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) + pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) + + pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) + pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) + pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) + pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) + @@ -3704,7 +4033,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (namecount > 0) { fprintf(outfile, "Named capturing subpatterns:\n"); - while (namecount-- > 0) + for (; namecount > 0; namecount--) { int imm2_size = test_mode == PCRE8_MODE ? 2 : 1; uint32_t length = (uint32_t)STRLEN(nametable + imm2_size); @@ -3728,8 +4057,9 @@ if ((pat_patctl.control & CTL_INFO) != 0) } } - if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); - if (match_empty) fprintf(outfile, "May match empty string\n"); + if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); + if (hasbackslashc) fprintf(outfile, "Contains \\C\n"); + if (match_empty) fprintf(outfile, "May match empty string\n"); pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); @@ -3762,13 +4092,12 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); - if ((pat_patctl.control & CTL_BSR_SET) != 0 || + if ((pat_patctl.control2 & CTL_BSR_SET) != 0 || (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0) fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? "any Unicode newline" : "CR, LF, or CRLF"); - if ((pat_patctl.control & CTL_NL_SET) != 0 || - (FLD(compiled_code, flags) & PCRE2_NL_SET) != 0) + if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0) { switch (newline_convention) { @@ -3866,11 +4195,22 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (FLD(compiled_code, executable_jit) != NULL) fprintf(outfile, "JIT compilation was successful\n"); else + { #ifdef SUPPORT_JIT - fprintf(outfile, "JIT compilation was not successful\n"); + int len; + fprintf(outfile, "JIT compilation was not successful"); + if (jitrc != 0) + { + fprintf(outfile, " ("); + PCRE2_GET_ERROR_MESSAGE(len, jitrc, pbuffer); + PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile); + fprintf(outfile, ")"); + } + fprintf(outfile, "\n"); #else fprintf(outfile, "JIT support is not available in this version of PCRE2\n"); #endif + } } } @@ -3954,7 +4294,7 @@ if (endf == filename) *fptr = fopen((const char *)filename, mode); if (*fptr == NULL) { - fprintf(outfile, "** Failed to open '%s'\n", filename); + fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno)); return PR_ABEND; } @@ -3985,6 +4325,7 @@ FILE *f; PCRE2_SIZE serial_size; size_t i; int rc, cmd, cmdlen; +uint16_t first_listed_newline; const char *cmdname; uint8_t *argptr, *serial; @@ -4039,11 +4380,37 @@ switch(cmd) (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl); break; - /* Pop a compiled pattern off the stack. Modifiers that do not affect the - compiled pattern (e.g. to give information) are permitted. The default + /* Check the default newline, and if not one of those listed, set up the + first one to be forced. An empty list unsets. */ + + case CMD_NEWLINE_DEFAULT: + local_newline_default = 0; /* Unset */ + first_listed_newline = 0; + for (;;) + { + while (isspace(*argptr)) argptr++; + if (*argptr == 0) break; + for (i = 1; i < sizeof(newlines)/sizeof(char *); i++) + { + size_t nlen = strlen(newlines[i]); + if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 && + isspace(argptr[nlen])) + { + if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */ + if (first_listed_newline == 0) first_listed_newline = i; + } + } + while (*argptr != 0 && !isspace(*argptr)) argptr++; + } + local_newline_default = first_listed_newline; + break; + + /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect + the compiled pattern (e.g. to give information) are permitted. The default pattern modifiers are ignored. */ case CMD_POP: + case CMD_POPCOPY: if (patstacknext <= 0) { fprintf(outfile, "** Can't pop off an empty stack\n"); @@ -4052,10 +4419,19 @@ switch(cmd) memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */ if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL)) return PR_SKIP; - SET(compiled_code, patstack[--patstacknext]); + + if (cmd == CMD_POP) + { + SET(compiled_code, patstack[--patstacknext]); + } + else + { + PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]); + } + if (pat_patctl.jit != 0) { - PCRE2_JIT_COMPILE(compiled_code, pat_patctl.jit); + PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); } if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info(); if ((pat_patctl.control & CTL_ANYINFO) != 0) @@ -4128,6 +4504,7 @@ switch(cmd) if (fread(serial, 1, serial_size, f) != serial_size) { fprintf(outfile, "** Wrong return from fread()\n"); + free(serial); return PR_ABEND; } fclose(f); @@ -4178,11 +4555,13 @@ static int process_pattern(void) { BOOL utf; +uint32_t k; uint8_t *p = buffer; -const uint8_t *use_tables; unsigned int delimiter = *p++; int errorcode; +void *use_pat_context; PCRE2_SIZE patlen; +PCRE2_SIZE valgrind_access_length; PCRE2_SIZE erroroffset; /* Initialize the context and pattern/data controls for this test from the @@ -4226,6 +4605,37 @@ patlen = p - buffer - 2; if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP; utf = (pat_patctl.options & PCRE2_UTF) != 0; +/* The utf8_input modifier is not allowed in 8-bit mode, and is mutually +exclusive with the utf modifier. */ + +if ((pat_patctl.control & CTL_UTF8_INPUT) != 0) + { + if (test_mode == PCRE8_MODE) + { + fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n"); + return PR_SKIP; + } + if (utf) + { + fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n"); + return PR_SKIP; + } + } + +/* Check for mutually exclusive modifiers. At present, these are all in the +first control word. */ + +for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++) + { + uint32_t c = pat_patctl.control & exclusive_pat_controls[k]; + if (c != 0 && c != (c & (~c+1))) + { + show_controls(c, 0, "** Not allowed together:"); + fprintf(outfile, "\n"); + return PR_SKIP; + } + } + /* Assume full JIT compile for jitverify and/or jitfast if nothing else was specified. */ @@ -4233,50 +4643,159 @@ if (pat_patctl.jit == 0 && (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0) pat_patctl.jit = 7; -/* POSIX and 'push' do not play together. */ - -if ((pat_patctl.control & (CTL_POSIX|CTL_PUSH)) == (CTL_POSIX|CTL_PUSH)) - { - fprintf(outfile, "** The POSIX interface is incompatible with 'push'\n"); - return PR_ABEND; - } - /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting -in callouts. Convert to binary if required. */ +in callouts. Convert from hex if requested (literal strings in quotes may be +present within the hexadecimal pairs). The result must necessarily be fewer +characters so will always fit in pbuffer8. */ if ((pat_patctl.control & CTL_HEXPAT) != 0) { uint8_t *pp, *pt; uint32_t c, d; - if ((pat_patctl.control & CTL_POSIX) != 0) - { - fprintf(outfile, "** Hex patterns are not supported for the POSIX API\n"); - return PR_SKIP; - } - pt = pbuffer8; for (pp = buffer + 1; *pp != 0; pp++) { if (isspace(*pp)) continue; - c = toupper(*pp++); - if (*pp == 0) + c = *pp++; + + /* Handle a literal substring */ + + if (c == '\'' || c == '"') { - fprintf(outfile, "** Odd number of digits in hex pattern.\n"); - return PR_SKIP; + uint8_t *pq = pp; + for (;; pp++) + { + d = *pp; + if (d == 0) + { + fprintf(outfile, "** Missing closing quote in hex pattern: " + "opening quote is at offset " PTR_SPEC ".\n", pq - buffer - 2); + return PR_SKIP; + } + if (d == c) break; + *pt++ = d; + } } - d = toupper(*pp); - if (!isxdigit(c) || !isxdigit(d)) + + /* Expect a hex pair */ + + else { - fprintf(outfile, "** Non-hex-digit in hex pattern.\n"); - return PR_SKIP; + if (!isxdigit(c)) + { + fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset " + PTR_SPEC " in hex pattern: quote missing?\n", c, pp - buffer - 2); + return PR_SKIP; + } + if (*pp == 0) + { + fprintf(outfile, "** Odd number of digits in hex pattern\n"); + return PR_SKIP; + } + d = *pp; + if (!isxdigit(d)) + { + fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset " + PTR_SPEC " in hex pattern: quote missing?\n", d, pp - buffer - 1); + return PR_SKIP; + } + c = toupper(c); + d = toupper(d); + *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) + + (isdigit(d)? (d - '0') : (d - 'A' + 10)); } - *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) + - (isdigit(d)? (d - '0') : (d - 'A' + 10)); } *pt = 0; patlen = pt - pbuffer8; } + +/* If not a hex string, process for repetition expansion if requested. */ + +else if ((pat_patctl.control & CTL_EXPAND) != 0) + { + uint8_t *pp, *pt; + + pt = pbuffer8; + for (pp = buffer + 1; *pp != 0; pp++) + { + uint8_t *pc = pp; + uint32_t count = 1; + size_t length = 1; + + /* Check for replication syntax; if not found, the defaults just set will + prevail and one character will be copied. */ + + if (pp[0] == '\\' && pp[1] == '[') + { + uint8_t *pe; + for (pe = pp + 2; *pe != 0; pe++) + { + if (pe[0] == ']' && pe[1] == '{') + { + uint32_t clen = pe - pc - 2; + uint32_t i = 0; + unsigned long uli; + char *endptr; + + pe += 2; + uli = strtoul((const char *)pe, &endptr, 10); + if (U32OVERFLOW(uli)) + { + fprintf(outfile, "** Pattern repeat count too large\n"); + return PR_SKIP; + } + + i = (uint32_t)uli; + pe = (uint8_t *)endptr; + if (*pe == '}') + { + if (i == 0) + { + fprintf(outfile, "** Zero repeat not allowed\n"); + return PR_SKIP; + } + pc += 2; + count = i; + length = clen; + pp = pe; + break; + } + } + } + } + + /* Add to output. If the buffer is too small expand it. The function for + expanding buffers always keeps buffer and pbuffer8 in step as far as their + size goes. */ + + while (pt + count * length > pbuffer8 + pbuffer8_size) + { + size_t pc_offset = pc - buffer; + size_t pp_offset = pp - buffer; + size_t pt_offset = pt - pbuffer8; + expand_input_buffers(); + pc = buffer + pc_offset; + pp = buffer + pp_offset; + pt = pbuffer8 + pt_offset; + } + + for (; count > 0; count--) + { + memcpy(pt, pc, length); + pt += length; + } + } + + *pt = 0; + patlen = pt - pbuffer8; + + if ((pat_patctl.control & CTL_INFO) != 0) + fprintf(outfile, "Expanded: %s\n", pbuffer8); + } + +/* Neither hex nor expanded, just copy the input verbatim. */ + else { strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1); @@ -4288,7 +4807,7 @@ if (pat_patctl.locale[0] != 0) { if (pat_patctl.tables_id != 0) { - fprintf(outfile, "** 'Locale' and 'tables' must not both be set.\n"); + fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n"); return PR_SKIP; } if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL) @@ -4336,7 +4855,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) const char *msg = "** Ignored with POSIX interface:"; #endif - if (test_mode != 8) + if (test_mode != PCRE8_MODE) { fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n"); return PR_SKIP; @@ -4358,32 +4877,83 @@ if ((pat_patctl.control & CTL_POSIX) != 0) pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, ""); msg = ""; } - if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0) + if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 || + (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0) { - show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, msg); + show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, + pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg); msg = ""; } + if (local_newline_default != 0) prmsg(&msg, "#newline_default"); + if (msg[0] == 0) fprintf(outfile, "\n"); - /* Translate PCRE2 options to POSIX options and then compile. On success, set - up a match_data block to be used for all matches. */ + /* Translate PCRE2 options to POSIX options and then compile. */ if (utf) cflags |= REG_UTF; + if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB; if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP; if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE; if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE; if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL; - if ((pat_patctl.options & PCRE2_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY; rc = regcomp(&preg, (char *)pbuffer8, cflags); - if (rc != 0) /* Failure */ + + /* Compiling failed */ + + if (rc != 0) { - (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size); - fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, pbuffer8); + size_t bsize, usize; + int psize; + + preg.re_pcre2_code = NULL; /* In case something was left in there */ + preg.re_match_data = NULL; + + bsize = (pat_patctl.regerror_buffsize != 0)? + pat_patctl.regerror_buffsize : pbuffer8_size; + if (bsize + 8 < pbuffer8_size) + memcpy(pbuffer8 + bsize, "DEADBEEF", 8); + usize = regerror(rc, &preg, (char *)pbuffer8, bsize); + + /* Inside regerror(), snprintf() is used. If the buffer is too small, some + versions of snprintf() put a zero byte at the end, but others do not. + Therefore, we print a maximum of one less than the size of the buffer. */ + + psize = (int)bsize - 1; + fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8); + if (usize > bsize) + { + fprintf(outfile, "** regerror() message truncated\n"); + if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0) + fprintf(outfile, "** regerror() buffer overflow\n"); + } return PR_SKIP; } + + /* Compiling succeeded. Check that the values in the preg block are sensible. + It can happen that pcre2test is accidentally linked with a different POSIX + library which succeeds, but of course puts different things into preg. In + this situation, calling regfree() may cause a segfault (or invalid free() in + valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the + calling of regfree() on exit. */ + + if (preg.re_pcre2_code == NULL || + ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER || + ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub || + preg.re_match_data == NULL || + preg.re_cflags != cflags) + { + fprintf(outfile, + "** The regcomp() function returned zero (success), but the values set\n" + "** in the preg block are not valid for PCRE2. Check that pcre2test is\n" + "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n" + "** some other POSIX regex library.\n**\n"); + preg.re_pcre2_code = NULL; + return PR_ABEND; + } + return PR_OK; #endif /* SUPPORT_PCRE2_8 */ } @@ -4391,22 +4961,26 @@ if ((pat_patctl.control & CTL_POSIX) != 0) /* Handle compiling via the native interface. Controls that act later are ignored with "push". Replacements are locked out. */ -if ((pat_patctl.control & CTL_PUSH) != 0) +if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) { if (pat_patctl.replacement[0] != 0) { fprintf(outfile, "** Replacement text is not supported with 'push'.\n"); return PR_OK; } - if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0) + if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 || + (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0) { show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS, + pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2, "** Ignored when compiled pattern is stacked with 'push':"); fprintf(outfile, "\n"); } - if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0) + if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 || + (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0) { show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS, + pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2, "** Applies only to compile when pattern is stacked with 'push':"); fprintf(outfile, "\n"); } @@ -4414,9 +4988,7 @@ if ((pat_patctl.control & CTL_PUSH) != 0) /* Convert the input in non-8-bit modes. */ -#ifdef SUPPORT_PCRE2_8 -if (test_mode == PCRE8_MODE) errorcode = 0; -#endif +errorcode = 0; #ifdef SUPPORT_PCRE2_16 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen); @@ -4447,23 +5019,71 @@ switch(errorcode) break; } -/* The pattern is now in pbuffer[8|16|32], with the length in patlen. By -default, however, we pass a zero-terminated pattern. The length is passed only -if we had a hex pattern. */ +/* The pattern is now in pbuffer[8|16|32], with the length in code units in +patlen. By default we pass a zero-terminated pattern, but a length is passed if +"use_length" was specified or this is a hex pattern (which might contain binary +zeros). When valgrind is supported, arrange for the unused part of the buffer +to be marked as no access. */ -if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED; +valgrind_access_length = patlen; +if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0) + { + patlen = PCRE2_ZERO_TERMINATED; + valgrind_access_length += 1; /* For the terminating zero */ + } + +#ifdef SUPPORT_VALGRIND +#ifdef SUPPORT_PCRE2_8 +if (test_mode == PCRE8_MODE && pbuffer8 != NULL) + { + VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length, + pbuffer8_size - valgrind_access_length); + } +#endif +#ifdef SUPPORT_PCRE2_16 +if (test_mode == PCRE16_MODE && pbuffer16 != NULL) + { + VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length, + pbuffer16_size - valgrind_access_length*sizeof(uint16_t)); + } +#endif +#ifdef SUPPORT_PCRE2_32 +if (test_mode == PCRE32_MODE && pbuffer32 != NULL) + { + VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length, + pbuffer32_size - valgrind_access_length*sizeof(uint32_t)); + } +#endif +#else /* Valgrind not supported */ +(void)valgrind_access_length; /* Avoid compiler warning */ +#endif + +/* If #newline_default has been used and the library was not compiled with an +appropriate default newline setting, local_newline_default will be non-zero. We +use this if there is no explicit newline modifier. */ + +if ((pat_patctl.control2 & CTL_NL_SET) == 0 && local_newline_default != 0) + { + SETFLD(pat_context, newline_convention, local_newline_default); + } + +/* The null_context modifier is used to test calling pcre2_compile() with a +NULL context. */ + +use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)? + NULL : PTR(pat_context); /* Compile many times when timing. */ if (timeit > 0) { - register int i; + int i; clock_t time_taken = 0; for (i = 0; i < timeit; i++) { clock_t start_time = clock(); PCRE2_COMPILE(compiled_code, pbuffer, patlen, - pat_patctl.options|forbid_utf, &errorcode, &erroroffset, pat_context); + pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context); time_taken += clock() - start_time; if (TEST(compiled_code, !=, NULL)) { SUB1(pcre2_code_free, compiled_code); } @@ -4477,7 +5097,66 @@ if (timeit > 0) /* A final compile that is used "for real". */ PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf, - &errorcode, &erroroffset, pat_context); + &errorcode, &erroroffset, use_pat_context); + +/* Call the JIT compiler if requested. When timing, we must free and recompile +the pattern each time because that is the only way to free the JIT compiled +code. We know that compilation will always succeed. */ + +if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0) + { + if (timeit > 0) + { + int i; + clock_t time_taken = 0; + for (i = 0; i < timeit; i++) + { + clock_t start_time; + SUB1(pcre2_code_free, compiled_code); + PCRE2_COMPILE(compiled_code, pbuffer, patlen, + pat_patctl.options|forbid_utf, &errorcode, &erroroffset, + use_pat_context); + start_time = clock(); + PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit); + time_taken += clock() - start_time; + } + total_jit_compile_time += time_taken; + fprintf(outfile, "JIT compile %.4f milliseconds\n", + (((double)time_taken * 1000.0) / (double)timeit) / + (double)CLOCKS_PER_SEC); + } + else + { + PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); + } + } + +/* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit +and 32-bit buffers can be marked completely undefined, but we must leave the +pattern in the 8-bit buffer defined because it may be read from a callout +during matching. */ + +#ifdef SUPPORT_VALGRIND +#ifdef SUPPORT_PCRE2_8 +if (test_mode == PCRE8_MODE) + { + VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length, + pbuffer8_size - valgrind_access_length); + } +#endif +#ifdef SUPPORT_PCRE2_16 +if (test_mode == PCRE16_MODE) + { + VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size); + } +#endif +#ifdef SUPPORT_PCRE2_32 +if (test_mode == PCRE32_MODE) + { + VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size); + } +#endif +#endif /* Compilation failed; go back for another re, skipping to blank line if non-interactive. */ @@ -4512,35 +5191,12 @@ if (forbid_utf != 0) if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0) return PR_ABEND; -/* Call the JIT compiler if requested. When timing, we must free and recompile -the pattern each time because that is the only way to free the JIT compiled -code. We know that compilation will always succeed. */ +/* If an explicit newline modifier was given, set the information flag in the +pattern so that it is preserved over push/pop. */ -if (pat_patctl.jit != 0) +if ((pat_patctl.control2 & CTL_NL_SET) != 0) { - if (timeit > 0) - { - register int i; - clock_t time_taken = 0; - for (i = 0; i < timeit; i++) - { - clock_t start_time; - SUB1(pcre2_code_free, compiled_code); - PCRE2_COMPILE(compiled_code, pbuffer, patlen, - pat_patctl.options|forbid_utf, &errorcode, &erroroffset, pat_context); - start_time = clock(); - PCRE2_JIT_COMPILE(compiled_code, pat_patctl.jit); - time_taken += clock() - start_time; - } - total_jit_compile_time += time_taken; - fprintf(outfile, "JIT compile %.4f milliseconds\n", - (((double)time_taken * 1000.0) / (double)timeit) / - (double)CLOCKS_PER_SEC); - } - else - { - PCRE2_JIT_COMPILE(compiled_code, pat_patctl.jit); - } + SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET); } /* Output code size and other information if requested. */ @@ -4566,6 +5222,27 @@ if ((pat_patctl.control & CTL_PUSH) != 0) SET(compiled_code, NULL); } +/* The "pushcopy" and "pushtablescopy" controls are similar, but push a +copy of the pattern, the latter with a copy of its character tables. This tests +the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */ + +if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) + { + if (patstacknext >= PATSTACKSIZE) + { + fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); + return PR_ABEND; + } + if ((pat_patctl.control & CTL_PUSHCOPY) != 0) + { + PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code); + } + else + { + PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++], + compiled_code); } + } + return PR_OK; } @@ -4599,10 +5276,10 @@ for (;;) if ((pat_patctl.control & CTL_JITFAST) != 0) PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, - dat_datctl.options, match_data, dat_context); + dat_datctl.options, match_data, PTR(dat_context)); else PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, - dat_datctl.options, match_data, dat_context); + dat_datctl.options, match_data, PTR(dat_context)); if (capcount == errnumber) { @@ -4648,6 +5325,7 @@ static int callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr) { uint32_t i, pre_start, post_start, subject_length; +PCRE2_SIZE current_position; BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; @@ -4698,22 +5376,37 @@ if (callout_capture) } } -/* Re-print the subject in canonical form, the first time or if giving full -datails. On subsequent calls in the same match, we use pchars just to find the -printed lengths of the substrings. */ +/* Re-print the subject in canonical form (with escapes for non-printing +characters), the first time, or if giving full details. On subsequent calls in +the same match, we use PCHARS() just to find the printed lengths of the +substrings. */ if (f != NULL) fprintf(f, "--->"); +/* The subject before the match start. */ + PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); +/* If a lookbehind is involved, the current position may be earlier than the +match start. If so, use the match start instead. */ + +current_position = (cb->current_position >= cb->start_match)? + cb->current_position : cb->start_match; + +/* The subject between the match start and the current position. */ + PCHARS(post_start, cb->subject, cb->start_match, - cb->current_position - cb->start_match, utf, f); + current_position - cb->start_match, utf, f); + +/* Print from the current position to the end. */ + +PCHARSV(cb->subject, current_position, cb->subject_length - current_position, + utf, f); + +/* Calculate the total subject printed length (no print). */ PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); -PCHARSV(cb->subject, cb->current_position, - cb->subject_length - cb->current_position, utf, f); - if (f != NULL) fprintf(f, "\n"); /* For automatic callouts, show the pattern offset. Otherwise, for a numerical @@ -4745,9 +5438,9 @@ if (post_start > 0) for (i = 0; i < subject_length - pre_start - post_start + 4; i++) fprintf(outfile, " "); -fprintf(outfile, "%.*s", - (int)((cb->next_item_length == 0)? 1 : cb->next_item_length), - pbuffer8 + cb->pattern_position); +if (cb->next_item_length != 0) + fprintf(outfile, "%.*s", (int)(cb->next_item_length), + pbuffer8 + cb->pattern_position); fprintf(outfile, "\n"); first_callout = FALSE; @@ -4775,8 +5468,17 @@ if (callout_data_ptr != NULL) } } -return (cb->callout_number != dat_datctl.cfail[0])? 0 : - (++callout_count >= dat_datctl.cfail[1])? 1 : 0; +callout_count++; + +if (cb->callout_number == dat_datctl.cerror[0] && + callout_count >= dat_datctl.cerror[1]) + return PCRE2_ERROR_CALLOUT; + +if (cb->callout_number == dat_datctl.cfail[0] && + callout_count >= dat_datctl.cfail[1]) + return 1; + +return 0; } @@ -5025,10 +5727,11 @@ process_data(void) { PCRE2_SIZE len, ulen; uint32_t gmatched; -uint32_t c; +uint32_t c, k; uint32_t g_notempty = 0; uint8_t *p, *pp, *start_rep; size_t needlen; +void *use_dat_context; BOOL utf; #ifdef SUPPORT_PCRE2_8 @@ -5050,6 +5753,7 @@ matching. */ DATCTXCPY(dat_context, default_dat_context); memcpy(&dat_datctl, &def_datctl, sizeof(datctl)); dat_datctl.control |= (pat_patctl.control & CTL_ALLPD); +dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD); strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement); /* Initialize for scanning the data line. */ @@ -5098,7 +5802,7 @@ if (dbuffer != NULL) the number of code units that will be needed (though the buffer may have to be extended if replication is involved). */ -needlen = (size_t)(len * code_unit_size); +needlen = (size_t)((len+1) * code_unit_size); if (dbuffer == NULL || needlen >= dbuffer_size) { while (needlen >= dbuffer_size) dbuffer_size *= 2; @@ -5112,17 +5816,21 @@ if (dbuffer == NULL || needlen >= dbuffer_size) SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */ /* Scan the data line, interpreting data escapes, and put the result into a -buffer of the appropriate width. In UTF mode, input can be UTF-8. */ +buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise, +in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier. +*/ while ((c = *p++) != 0) { - int i = 0; + int32_t i = 0; size_t replen; /* ] may mark the end of a replicated sequence */ if (c == ']' && start_rep != NULL) { + long li; + char *endptr; size_t qoffset = CAST8VAR(q) - dbuffer; size_t rep_offset = start_rep - dbuffer; @@ -5131,12 +5839,22 @@ while ((c = *p++) != 0) fprintf(outfile, "** Expected '{' after \\[....]\n"); return PR_OK; } - while (isdigit(*p)) i = i * 10 + *p++ - '0'; + + li = strtol((const char *)p, &endptr, 10); + if (S32OVERFLOW(li)) + { + fprintf(outfile, "** Repeat count too large\n"); + return PR_OK; + } + + p = (uint8_t *)endptr; if (*p++ != '}') { fprintf(outfile, "** Expected '}' after \\[...]{...\n"); return PR_OK; } + + i = (int32_t)li; if (i-- == 0) { fprintf(outfile, "** Zero repeat not allowed\n"); @@ -5169,11 +5887,20 @@ while ((c = *p++) != 0) continue; } - /* Handle a non-escaped character */ + /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input + set, do the fudge for setting the top bit. */ if (c != '\\') { - if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); } + uint32_t topbit = 0; + if (test_mode == PCRE32_MODE && c == 0xff && *p != 0) + { + topbit = 0x80000000; + c = *p++; + } + if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) && + HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); } + c |= topbit; } /* Handle backslash escapes */ @@ -5367,38 +6094,44 @@ ulen = len/code_unit_size; /* Length in code units */ if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl)) return PR_OK; -/* Check for mutually exclusive modifiers. */ +/* Check for mutually exclusive modifiers. At present, these are all in the +first control word. */ -c = dat_datctl.control & EXCLUSIVE_DAT_CONTROLS; -if (c - (c & -c) != 0) +for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++) { - show_controls(c, "** Not allowed together:"); - fprintf(outfile, "\n"); + c = dat_datctl.control & exclusive_dat_controls[k]; + if (c != 0 && c != (c & (~c+1))) + { + show_controls(c, 0, "** Not allowed together:"); + fprintf(outfile, "\n"); + return PR_OK; + } + } + +if (pat_patctl.replacement[0] != 0 && + (dat_datctl.control & CTL_NULLCONTEXT) != 0) + { + fprintf(outfile, "** Replacement text is not supported with null_context.\n"); return PR_OK; } -/* If we have explicit valgrind support, mark the data from after its end to -the end of the buffer as unaddressable, so that a read over the end of the -buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not -building with valgrind support, at least move the data to the end of the buffer -so that it might at least cause a crash. If we are using the POSIX interface, -or testing zero-termination, we must include the terminating zero. */ +/* We now have the subject in dbuffer, with len containing the byte length, and +ulen containing the code unit length. Move the data to the end of the buffer so +that a read over the end can be caught by valgrind or other means. If we have +explicit valgrind support, mark the unused start of the buffer unaddressable. +If we are using the POSIX interface, or testing zero-termination, we must +include the terminating zero in the usable data. */ -pp = dbuffer; c = code_unit_size * (((pat_patctl.control & CTL_POSIX) + (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0); - +pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c); #ifdef SUPPORT_VALGRIND - VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c)); -#else - pp = memmove(pp + dbuffer_size - len - c, pp, len + c); + VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c)); #endif -/* We now have len containing the byte length, ulen containing the code unit -length, and pp pointing to the subject string. POSIX matching is only possible -in 8-bit mode, and it does not support timing or other fancy features. Some -were checked at compile time, but we need to check the match-time settings -here. */ +/* Now pp points to the subject string. POSIX matching is only possible in +8-bit mode, and it does not support timing or other fancy features. Some were +checked at compile time, but we need to check the match-time settings here. */ #ifdef SUPPORT_PCRE2_8 if ((pat_patctl.control & CTL_POSIX) != 0) @@ -5408,13 +6141,16 @@ if ((pat_patctl.control & CTL_POSIX) != 0) regmatch_t *pmatch = NULL; const char *msg = "** Ignored with POSIX interface:"; - if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET) + if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET) + prmsg(&msg, "callout_error"); + if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET) prmsg(&msg, "callout_fail"); if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0) prmsg(&msg, "copy"); if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0) prmsg(&msg, "get"); if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack"); + if (dat_datctl.offset != 0) prmsg(&msg, "offset"); if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0) { @@ -5422,9 +6158,11 @@ if ((pat_patctl.control & CTL_POSIX) != 0) show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS); msg = ""; } - if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0) + if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 || + (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0) { - show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, msg); + show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, + dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg); msg = ""; } @@ -5436,14 +6174,13 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL; if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; - rc = regexec(&preg, (const char *)pp + dat_datctl.offset, - dat_datctl.oveccount, pmatch, eflags); + rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags); if (rc != 0) { (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size); fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8); } - else if ((pat_patctl.options & PCRE2_NO_AUTO_CAPTURE) != 0) + else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) fprintf(outfile, "Matched with REG_NOSUB\n"); else if (dat_datctl.oveccount == 0) fprintf(outfile, "Matched without capture\n"); @@ -5454,18 +6191,27 @@ if ((pat_patctl.control & CTL_POSIX) != 0) { if (pmatch[i].rm_so >= 0) { + PCRE2_SIZE start = pmatch[i].rm_so; + PCRE2_SIZE end = pmatch[i].rm_eo; + if (start > end) + { + start = pmatch[i].rm_eo; + end = pmatch[i].rm_so; + fprintf(outfile, "Start of matched string is beyond its end - " + "displaying from end to start.\n"); + } fprintf(outfile, "%2d: ", (int)i); - PCHARSV(dbuffer, pmatch[i].rm_so, - pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile); + PCHARSV(pp, start, end - start, utf, outfile); fprintf(outfile, "\n"); + if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) || (dat_datctl.control & CTL_ALLAFTERTEXT) != 0) { fprintf(outfile, "%2d+ ", (int)i); - PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo, - utf, outfile); - fprintf(outfile, "\n"); - } + /* Note: don't use the start/end variables here because we want to + show the text from what is reported as the end. */ + PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile); + fprintf(outfile, "\n"); } } } } @@ -5498,6 +6244,12 @@ if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT && if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) ulen = PCRE2_ZERO_TERMINATED; +/* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a +NULL context. */ + +use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)? + NULL : PTR(dat_context); + /* Enable display of malloc/free if wanted. */ show_memory = (dat_datctl.control & CTL_MEMORY) != 0; @@ -5571,7 +6323,7 @@ if (dat_datctl.replacement[0] != 0) uint8_t *pr; uint8_t rbuffer[REPLACE_BUFFSIZE]; uint8_t nbuffer[REPLACE_BUFFSIZE]; - uint32_t goption; + uint32_t xoptions; PCRE2_SIZE rlen, nsize, erroroffset; BOOL badutf = FALSE; @@ -5588,8 +6340,17 @@ if (dat_datctl.replacement[0] != 0) if (timeitm) fprintf(outfile, "** Timing is not supported with replace: ignored\n"); - goption = ((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : - PCRE2_SUBSTITUTE_GLOBAL; + xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : + PCRE2_SUBSTITUTE_GLOBAL) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 : + PCRE2_SUBSTITUTE_EXTENDED) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 : + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 : + PCRE2_SUBSTITUTE_UNKNOWN_UNSET) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 : + PCRE2_SUBSTITUTE_UNSET_EMPTY); + SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */ pr = dat_datctl.replacement; @@ -5676,14 +6437,21 @@ if (dat_datctl.replacement[0] != 0) else rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size; PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset, - dat_datctl.options|goption, match_data, dat_context, + dat_datctl.options|xoptions, match_data, dat_context, rbuffer, rlen, nbuffer, &nsize); if (rc < 0) { - fprintf(outfile, "Failed: error %d: ", rc); - PCRE2_GET_ERROR_MESSAGE(nsize, rc, pbuffer); - PCHARSV(CASTVAR(void *, pbuffer), 0, nsize, FALSE, outfile); + PCRE2_SIZE msize; + fprintf(outfile, "Failed: error %d", rc); + if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET) + fprintf(outfile, " at offset %ld in replacement", (long int)nsize); + fprintf(outfile, ": "); + PCRE2_GET_ERROR_MESSAGE(msize, rc, pbuffer); + PCHARSV(CASTVAR(void *, pbuffer), 0, msize, FALSE, outfile); + if (rc == PCRE2_ERROR_NOMEMORY && + (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0) + fprintf(outfile, ": %ld code units are needed", (long int)nsize); } else { @@ -5735,7 +6503,7 @@ else for (gmatched = 0;; gmatched++) if (timeitm > 0) { - register int i; + int i; clock_t start_time, time_taken; if ((dat_datctl.control & CTL_DFA) != 0) @@ -5752,7 +6520,7 @@ else for (gmatched = 0;; gmatched++) { PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options | g_notempty, match_data, - dat_context, dfa_workspace, DFA_WS_DIMENSION); + use_dat_context, dfa_workspace, DFA_WS_DIMENSION); } } @@ -5763,7 +6531,7 @@ else for (gmatched = 0;; gmatched++) { PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options | g_notempty, match_data, - dat_context); + use_dat_context); } } @@ -5774,7 +6542,7 @@ else for (gmatched = 0;; gmatched++) { PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options | g_notempty, match_data, - dat_context); + use_dat_context); } } total_match_time += (time_taken = clock() - start_time); @@ -5795,7 +6563,7 @@ else for (gmatched = 0;; gmatched++) } /* Otherwise just run a single match, setting up a callout if required (the - default). */ + default). There is a copy of the pattern in pbuffer8 for use by callouts. */ else { @@ -5822,7 +6590,7 @@ else for (gmatched = 0;; gmatched++) dfa_workspace[0] = -1; /* To catch bad restart */ PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options | g_notempty, match_data, - dat_context, dfa_workspace, DFA_WS_DIMENSION); + use_dat_context, dfa_workspace, DFA_WS_DIMENSION); if (capcount == 0) { fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); @@ -5833,10 +6601,10 @@ else for (gmatched = 0;; gmatched++) { if ((pat_patctl.control & CTL_JITFAST) != 0) PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, - dat_datctl.options | g_notempty, match_data, dat_context); + dat_datctl.options | g_notempty, match_data, use_dat_context); else PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, - dat_datctl.options | g_notempty, match_data, dat_context); + dat_datctl.options | g_notempty, match_data, use_dat_context); if (capcount == 0) { fprintf(outfile, "Matched, but too many substrings\n"); @@ -5884,15 +6652,23 @@ else for (gmatched = 0;; gmatched++) /* "allcaptures" requests showing of all captures in the pattern, to check unset ones at the end. It may be set on the pattern or the data. Implement - by setting capcount to the maximum. */ + by setting capcount to the maximum. This is not relevant for DFA matching, + so ignore it. */ if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) { uint32_t maxcapcount; - if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0) - return PR_SKIP; - capcount = maxcapcount + 1; /* Allow for full match */ - if (capcount > (int)oveccount) capcount = oveccount; + if ((dat_datctl.control & CTL_DFA) != 0) + { + fprintf(outfile, "** Ignored after DFA matching: allcaptures\n"); + } + else + { + if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0) + return PR_SKIP; + capcount = maxcapcount + 1; /* Allow for full match */ + if (capcount > (int)oveccount) capcount = oveccount; + } } /* Output the captured substrings. Note that, for the matched string, @@ -6046,7 +6822,8 @@ else for (gmatched = 0;; gmatched++) TESTFLD(match_data, mark, !=, NULL)) { fprintf(outfile, ", mark="); - PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf, outfile); + PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf, + outfile); rubriclength += 7; } fprintf(outfile, ": "); @@ -6340,10 +7117,12 @@ printf(" -16 use the 16-bit library\n"); #ifdef SUPPORT_PCRE2_32 printf(" -32 use the 32-bit library\n"); #endif -printf(" -b set default pattern control 'fullbincode'\n"); +printf(" -ac set default pattern option PCRE2_AUTO_CALLOUT\n"); +printf(" -b set default pattern modifier 'fullbincode'\n"); printf(" -C show PCRE2 compile-time options and exit\n"); printf(" -C arg show a specific compile-time option and exit with its\n"); printf(" value if numeric (else 0). The arg can be:\n"); +printf(" backslash-C use of \\C is enabled [0, 1]\n"); printf(" bsr \\R type [ANYCRLF, ANY]\n"); printf(" ebcdic compiled for EBCDIC character code [0,1]\n"); printf(" ebcdic-nl NL code if compiled for EBCDIC\n"); @@ -6354,14 +7133,15 @@ printf(" pcre2-8 8 bit library support enabled [0, 1]\n"); printf(" pcre2-16 16 bit library support enabled [0, 1]\n"); printf(" pcre2-32 32 bit library support enabled [0, 1]\n"); printf(" unicode Unicode and UTF support enabled [0, 1]\n"); -printf(" -d set default pattern control 'debug'\n"); -printf(" -dfa set default subject control 'dfa'\n"); +printf(" -d set default pattern modifier 'debug'\n"); +printf(" -dfa set default subject modifier 'dfa'\n"); +printf(" -error show messages for error numbers, then exit\n"); printf(" -help show usage information\n"); -printf(" -i set default pattern control 'info'\n"); -printf(" -jit set default pattern control 'jit'\n"); +printf(" -i set default pattern modifier 'info'\n"); +printf(" -jit set default pattern modifier 'jit'\n"); printf(" -q quiet: do not output PCRE2 version number at start\n"); -printf(" -pattern set default pattern control fields\n"); -printf(" -subject set default subject control fields\n"); +printf(" -pattern set default pattern modifier fields\n"); +printf(" -subject set default subject modifier fields\n"); printf(" -S set stack size to megabytes\n"); printf(" -t [] time compilation and execution, repeating times\n"); printf(" -tm [] time execution (matching) only, repeating times\n"); @@ -6454,6 +7234,9 @@ printf("Compiled with\n"); #ifdef EBCDIC printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF); +#if defined NATIVE_ZOS +printf(" EBCDIC code page %s or similar\n", pcrz_cpversion()); +#endif #endif #ifdef SUPPORT_PCRE2_8 @@ -6465,6 +7248,9 @@ printf(" 16-bit support\n"); #ifdef SUPPORT_PCRE2_32 printf(" 32-bit support\n"); #endif +#ifdef SUPPORT_VALGRIND +printf(" Valgrind support\n"); +#endif (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval); if (optval != 0) @@ -6492,6 +7278,11 @@ print_newline_config(optval, FALSE); (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval); printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" : "all Unicode newlines"); +#ifdef NEVER_BACKSLASH_C +printf(" \\C is not supported\n"); +#else +printf(" \\C is supported\n"); +#endif (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval); printf(" Internal link size = %d\n", optval); (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval); @@ -6518,13 +7309,13 @@ main(int argc, char **argv) { uint32_t yield = 0; uint32_t op = 1; -uint32_t stack_size; BOOL notdone = TRUE; BOOL quiet = FALSE; BOOL showtotaltimes = FALSE; BOOL skipping = FALSE; char *arg_subject = NULL; char *arg_pattern = NULL; +char *arg_error = NULL; /* The offsets to the options and control bits fields of the pattern and data control blocks must be the same so that common options and controls such as @@ -6532,7 +7323,8 @@ control blocks must be the same so that common options and controls such as We cannot test this till runtime because "offsetof" does not work in the preprocessor. */ -if (PO(options) != DO(options) || PO(control) != DO(control)) +if (PO(options) != DO(options) || PO(control) != DO(control) || + PO(control2) != DO(control2)) { fprintf(stderr, "** Coding error: " "options and control offsets for pattern and data must be the same.\n"); @@ -6583,14 +7375,16 @@ memset(&def_datctl, 0, sizeof(datctl)); def_datctl.oveccount = DEFAULT_OVECCOUNT; def_datctl.copy_numbers[0] = -1; def_datctl.get_numbers[0] = -1; -def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET; +def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET; +def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET; /* Scan command line options. */ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) { - const char *endptr; + char *endptr; char *arg = argv[op]; + unsigned long uli; /* Display and/or set return code for configuration options. */ @@ -6640,14 +7434,21 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) /* Set system stack size */ else if (strcmp(arg, "-S") == 0 && argc > 2 && - ((stack_size = get_value(argv[op+1], &endptr)), *endptr == 0)) + ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) { #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) fprintf(stderr, "pcre2test: -S is not supported on this OS\n"); exit(1); #else int rc; + uint32_t stack_size; struct rlimit rlim; + if (U32OVERFLOW(uli)) + { + fprintf(stderr, "** Argument for -S is too big\n"); + exit(1); + } + stack_size = (uint32_t)uli; getrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = stack_size * 1024 * 1024; if (rlim.rlim_cur > rlim.rlim_max) @@ -6672,10 +7473,11 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) /* Set some common pattern and subject controls */ + else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT; + else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE; + else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG; else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; - else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE; - else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG; - else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO; + else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO; else if (strcmp(arg, "-jit") == 0) { def_patctl.jit = 7; /* full & partial */ @@ -6690,12 +7492,16 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 || strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0) { - int temp; int both = arg[2] == 0; showtotaltimes = arg[1] == 'T'; - if (argc > 2 && (temp = get_value(argv[op+1], &endptr), *endptr == 0)) + if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0)) { - timeitm = temp; + if (U32OVERFLOW(uli)) + { + fprintf(stderr, "** Argument for %s is too big\n", arg); + exit(1); + } + timeitm = (int)uli; op++; argc--; } @@ -6724,6 +7530,12 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) /* The following options save their data for processing once we know what the running mode is. */ + else if (strcmp(arg, "-error") == 0) + { + arg_error = argv[op+1]; + goto CHECK_VALUE_EXISTS; + } + else if (strcmp(arg, "-subject") == 0) { arg_subject = argv[op+1]; @@ -6757,6 +7569,88 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) argc--; } +/* If -error was present, get the error numbers, show the messages, and exit. +We wait to do this until we know which mode we are in. */ + +if (arg_error != NULL) + { + int len; + int errcode; + char *endptr; + +/* Ensure the relevant non-8-bit buffer is available. */ + +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + { + pbuffer16_size = 256; + pbuffer16 = (uint16_t *)malloc(pbuffer16_size); + if (pbuffer16 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n", + (unsigned long int)pbuffer16_size); + yield = 1; + goto EXIT; + } + } +#endif + +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) + { + pbuffer32_size = 256; + pbuffer32 = (uint32_t *)malloc(pbuffer32_size); + if (pbuffer32 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n", + (unsigned long int)pbuffer32_size); + yield = 1; + goto EXIT; + } + } +#endif + + /* Loop along a list of error numbers. */ + + for (;;) + { + errcode = strtol(arg_error, &endptr, 10); + if (*endptr != 0 && *endptr != CHAR_COMMA) + { + fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error); + yield = 1; + goto EXIT; + } + printf("Error %d: ", errcode); + PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer); + if (len < 0) + { + switch (len) + { + case PCRE2_ERROR_BADDATA: + printf("PCRE2_ERROR_BADDATA (unknown error number)"); + break; + + case PCRE2_ERROR_NOMEMORY: + printf("PCRE2_ERROR_NOMEMORY (buffer too small)"); + break; + + default: + printf("Unexpected return (%d) from pcre2_get_error_message()", len); + break; + } + } + else + { + PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout); + } + printf("\n"); + if (*endptr == 0) goto EXIT; + arg_error = endptr + 1; + } + /* Control never reaches here */ + } /* End of -error handling */ + /* Initialize things that cannot be done until we know which test mode we are running in. When HEAP_MATCH_RECURSE is undefined, calling pcre2_set_recursion_ memory_management() is a no-op, but we call it in order to exercise it. Also @@ -6847,18 +7741,22 @@ if (argc > 1 && strcmp(argv[op], "-") != 0) infile = fopen(argv[op], INPUT_MODE); if (infile == NULL) { - printf("** Failed to open '%s'\n", argv[op]); + printf("** Failed to open '%s': %s\n", argv[op], strerror(errno)); yield = 1; goto EXIT; } } +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +if (INTERACTIVE(infile)) using_history(); +#endif + if (argc > 2) { outfile = fopen(argv[op+1], OUTPUT_MODE); if (outfile == NULL) { - printf("** Failed to open '%s'\n", argv[op+1]); + printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno)); yield = 1; goto EXIT; } @@ -6891,8 +7789,7 @@ while (notdone) p = buffer; /* If we have a pattern set up for testing, or we are skipping after a - compile failure, a blank line terminates this test; otherwise process the - line as a data line. */ + compile failure, a blank line terminates this test. */ if (expectdata || skipping) { @@ -6915,13 +7812,21 @@ while (notdone) skipping = FALSE; setlocale(LC_CTYPE, "C"); } - else if (!skipping) rc = process_data(); + + /* Otherwise, if we are not skipping, and the line is not a data comment + line starting with "\=", process a data line. */ + + else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2]))) + { + rc = process_data(); + } } /* We do not have a pattern set up for testing. Lines starting with # are either comments or special commands. Blank lines are ignored. Otherwise, the line must start with a valid delimiter. It is then processed as a pattern - line. */ + line. A copy of the pattern is left in pbuffer8 for use by callouts. Under + valgrind, make the unused part of the buffer undefined, to catch overruns. */ else if (*p == '#') { @@ -6982,6 +7887,10 @@ if (showtotaltimes) EXIT: +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +if (infile != NULL && INTERACTIVE(infile)) clear_history(); +#endif + if (infile != NULL && infile != stdin) fclose(infile); if (outfile != NULL && outfile != stdout) fclose(outfile); @@ -7016,7 +7925,7 @@ if (jit_stack != NULL) #ifdef SUPPORT_PCRE2_8 #undef BITS #define BITS 8 -regfree(&preg); +if (preg.re_pcre2_code != NULL) regfree(&preg); FREECONTEXTS; #endif diff --git a/pcre2/src/sljit/sljitConfig.h b/pcre2/src/sljit/sljitConfig.h index 1c8a521aa..2e70224da 100644 --- a/pcre2/src/sljit/sljitConfig.h +++ b/pcre2/src/sljit/sljitConfig.h @@ -82,7 +82,7 @@ /* --------------------------------------------------------------------- */ /* If SLJIT_STD_MACROS_DEFINED is not defined, the application should - define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMMOVE, and NULL. */ + define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */ #ifndef SLJIT_STD_MACROS_DEFINED /* Disabled by default. */ #define SLJIT_STD_MACROS_DEFINED 0 @@ -90,10 +90,20 @@ /* Executable code allocation: If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should - define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */ + define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */ #ifndef SLJIT_EXECUTABLE_ALLOCATOR /* Enabled by default. */ #define SLJIT_EXECUTABLE_ALLOCATOR 1 + +/* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses + an allocator which does not set writable and executable + permission flags at the same time. The trade-of is increased + memory consumption and disabled dynamic code modifications. */ +#ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 +#endif + #endif /* Force cdecl calling convention even if a better calling diff --git a/pcre2/src/sljit/sljitConfigInternal.h b/pcre2/src/sljit/sljitConfigInternal.h index 8a4b9664f..5d461017e 100644 --- a/pcre2/src/sljit/sljitConfigInternal.h +++ b/pcre2/src/sljit/sljitConfigInternal.h @@ -31,14 +31,14 @@ SLJIT defines the following architecture dependent types and macros: Types: - sljit_sb, sljit_ub : signed and unsigned 8 bit byte - sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type - sljit_si, sljit_ui : signed and unsigned 32 bit integer type - sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer - sljit_p : unsgined pointer value (usually the same as sljit_uw, but - some 64 bit ABIs may use 32 bit pointers) - sljit_s : single precision floating point value - sljit_d : double precision floating point value + sljit_s8, sljit_u8 : signed and unsigned 8 bit integer type + sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type + sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_p : unsgined pointer value (usually the same as sljit_uw, but + some 64 bit ABIs may use 32 bit pointers) + sljit_f32 : 32 bit single precision floating point value + sljit_f64 : 64 bit double precision floating point value Macros for feature detection (boolean): SLJIT_32BIT_ARCHITECTURE : 32 bit architecture @@ -56,10 +56,10 @@ SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index - SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing - a double precision floating point array by index - SLJIT_SINGLE_SHIFT : the shift required to apply when accessing - a single precision floating point array by index + SLJIT_F32_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_F64_SHIFT : the shift required to apply when accessing + a double precision floating point array by index SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address @@ -187,14 +187,6 @@ /* External function definitions. */ /**********************************/ -#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) - -/* These libraries are needed for the macros below. */ -#include -#include - -#endif /* SLJIT_STD_MACROS_DEFINED */ - /* General macros: Note: SLJIT is designed to be independent from them as possible. @@ -210,8 +202,8 @@ #define SLJIT_FREE(ptr, allocator_data) free(ptr) #endif -#ifndef SLJIT_MEMMOVE -#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) +#ifndef SLJIT_MEMCPY +#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) #endif #ifndef SLJIT_ZEROMEM @@ -252,11 +244,6 @@ #endif #endif /* !SLJIT_INLINE */ -#ifndef SLJIT_CONST -/* Const variables. */ -#define SLJIT_CONST const -#endif - #ifndef SLJIT_UNUSED_ARG /* Unused arguments. */ #define SLJIT_UNUSED_ARG(arg) (void)arg @@ -284,6 +271,15 @@ /* Instruction cache flush. */ /****************************/ +#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) +#if __has_builtin(__builtin___clear_cache) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)from, (char*)to) + +#endif /* __has_builtin(__builtin___clear_cache) */ +#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ + #ifndef SLJIT_CACHE_FLUSH #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) @@ -300,6 +296,11 @@ #define SLJIT_CACHE_FLUSH(from, to) \ sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)from, (char*)to) + #elif defined __ANDROID__ /* Android lacks __clear_cache; instead, cacheflush should be used. */ @@ -312,12 +313,14 @@ /* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ #define SLJIT_CACHE_FLUSH(from, to) \ ppc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 #elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) /* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ #define SLJIT_CACHE_FLUSH(from, to) \ sparc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 #else @@ -330,20 +333,20 @@ #endif /* !SLJIT_CACHE_FLUSH */ /******************************************************/ -/* Byte/half/int/word/single/double type definitions. */ +/* Integer and floating point type definitions. */ /******************************************************/ /* 8 bit byte type. */ -typedef unsigned char sljit_ub; -typedef signed char sljit_sb; +typedef unsigned char sljit_u8; +typedef signed char sljit_s8; /* 16 bit half-word type. */ -typedef unsigned short int sljit_uh; -typedef signed short int sljit_sh; +typedef unsigned short int sljit_u16; +typedef signed short int sljit_s16; /* 32 bit integer type. */ -typedef unsigned int sljit_ui; -typedef signed int sljit_si; +typedef unsigned int sljit_u32; +typedef signed int sljit_s32; /* Machine word type. Enough for storing a pointer. 32 bit for 32 bit machines. @@ -377,15 +380,15 @@ typedef long int sljit_sw; typedef sljit_uw sljit_p; /* Floating point types. */ -typedef float sljit_s; -typedef double sljit_d; +typedef float sljit_f32; +typedef double sljit_f64; /* Shift for pointer sized data. */ #define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT /* Shift for double precision sized data. */ -#define SLJIT_DOUBLE_SHIFT 3 -#define SLJIT_SINGLE_SHIFT 2 +#define SLJIT_F32_SHIFT 2 +#define SLJIT_F64_SHIFT 3 #ifndef SLJIT_W @@ -534,6 +537,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) #define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); +#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr) +#else +#define SLJIT_EXEC_OFFSET(ptr) 0 +#endif + #endif /**********************************************/ @@ -613,6 +624,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw)) #endif +#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) + +#define SLJIT_NUMBER_OF_REGISTERS 10 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 5 +#define SLJIT_LOCALS_OFFSET_BASE 0 + #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #define SLJIT_NUMBER_OF_REGISTERS 0 diff --git a/pcre2/src/sljit/sljitExecAllocator.c b/pcre2/src/sljit/sljitExecAllocator.c index f24ed3379..9f88f990b 100644 --- a/pcre2/src/sljit/sljitExecAllocator.c +++ b/pcre2/src/sljit/sljitExecAllocator.c @@ -86,7 +86,7 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); } -static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) { SLJIT_UNUSED_ARG(size); VirtualFree(chunk, 0, MEM_RELEASE); @@ -96,7 +96,7 @@ static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { - void* retval; + void *retval; #ifdef MAP_ANON retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); @@ -111,7 +111,7 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) return (retval != MAP_FAILED) ? retval : NULL; } -static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) { munmap(chunk, size); } @@ -137,10 +137,10 @@ struct free_block { }; #define AS_BLOCK_HEADER(base, offset) \ - ((struct block_header*)(((sljit_ub*)base) + offset)) + ((struct block_header*)(((sljit_u8*)base) + offset)) #define AS_FREE_BLOCK(base, offset) \ - ((struct free_block*)(((sljit_ub*)base) + offset)) -#define MEM_START(base) ((void*)(((sljit_ub*)base) + sizeof(struct block_header))) + ((struct free_block*)(((sljit_u8*)base) + offset)) +#define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header))) #define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) static struct free_block* free_blocks; @@ -153,7 +153,7 @@ static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, free_block->size = size; free_block->next = free_blocks; - free_block->prev = 0; + free_block->prev = NULL; if (free_blocks) free_blocks->prev = free_block; free_blocks = free_block; @@ -180,8 +180,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) sljit_uw chunk_size; allocator_grab_lock(); - if (size < sizeof(struct free_block)) - size = sizeof(struct free_block); + if (size < (64 - sizeof(struct block_header))) + size = (64 - sizeof(struct block_header)); size = ALIGN_SIZE(size); free_block = free_blocks; diff --git a/pcre2/src/sljit/sljitLir.c b/pcre2/src/sljit/sljitLir.c index 0f1b1c9cc..0b39ec90a 100644 --- a/pcre2/src/sljit/sljitLir.c +++ b/pcre2/src/sljit/sljitLir.c @@ -26,6 +26,14 @@ #include "sljitLir.h" +#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) + +/* These libraries are needed for the macros below. */ +#include +#include + +#endif /* SLJIT_STD_MACROS_DEFINED */ + #define CHECK_ERROR() \ do { \ if (SLJIT_UNLIKELY(compiler->error)) \ @@ -77,16 +85,16 @@ #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #define GET_OPCODE(op) \ - ((op) & ~(SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + ((op) & ~(SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) #define GET_FLAGS(op) \ ((op) & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) #define GET_ALL_FLAGS(op) \ - ((op) & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + ((op) & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) #define TYPE_CAST_NEEDED(op) \ - (((op) >= SLJIT_MOV_UB && (op) <= SLJIT_MOV_SH) || ((op) >= SLJIT_MOVU_UB && (op) <= SLJIT_MOVU_SH)) + (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S16)) #define BUF_SIZE 4096 @@ -242,9 +250,21 @@ #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#include "sljitProtExecAllocator.c" +#else #include "sljitExecAllocator.c" #endif +#endif + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset)) +#else +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr)) +#endif + /* Argument checking features. */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -257,7 +277,7 @@ return 1; \ } while (0) -#define CHECK_RETURN_TYPE sljit_si +#define CHECK_RETURN_TYPE sljit_s32 #define CHECK_RETURN_OK return 0 #define CHECK(x) \ @@ -320,7 +340,7 @@ #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #define SLJIT_NEEDS_COMPILER_INIT 1 -static sljit_si compiler_initialized = 0; +static sljit_s32 compiler_initialized = 0; /* A thread safe initialization. */ static void init_compiler(void); #endif @@ -333,17 +353,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler)); SLJIT_COMPILE_ASSERT( - sizeof(sljit_sb) == 1 && sizeof(sljit_ub) == 1 - && sizeof(sljit_sh) == 2 && sizeof(sljit_uh) == 2 - && sizeof(sljit_si) == 4 && sizeof(sljit_ui) == 4 + sizeof(sljit_s8) == 1 && sizeof(sljit_u8) == 1 + && sizeof(sljit_s16) == 2 && sizeof(sljit_u16) == 2 + && sizeof(sljit_s32) == 4 && sizeof(sljit_u32) == 4 && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8) && sizeof(sljit_p) <= sizeof(sljit_sw) && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), invalid_integer_types); - SLJIT_COMPILE_ASSERT(SLJIT_INT_OP == SLJIT_SINGLE_OP, + SLJIT_COMPILE_ASSERT(SLJIT_I32_OP == SLJIT_F32_OP, int_op_and_single_op_must_be_the_same); - SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_SINGLE_OP, + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP, rewritable_jump_and_single_op_must_not_be_the_same); /* Only the non-zero members must be set. */ @@ -379,14 +399,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) - + CPOOL_SIZE * sizeof(sljit_ub), allocator_data); + + CPOOL_SIZE * sizeof(sljit_u8), allocator_data); if (!compiler->cpool) { SLJIT_FREE(compiler->buf, allocator_data); SLJIT_FREE(compiler->abuf, allocator_data); SLJIT_FREE(compiler, allocator_data); return NULL; } - compiler->cpool_unique = (sljit_ub*)(compiler->cpool + CPOOL_SIZE); + compiler->cpool_unique = (sljit_u8*)(compiler->cpool + CPOOL_SIZE); compiler->cpool_diff = 0xffffffff; #endif @@ -485,7 +505,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) { - sljit_ub *ret; + sljit_u8 *ret; struct sljit_memory_fragment *new_frag; SLJIT_ASSERT(size <= 256); @@ -504,7 +524,7 @@ static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) { - sljit_ub *ret; + sljit_u8 *ret; struct sljit_memory_fragment *new_frag; SLJIT_ASSERT(size <= 256); @@ -521,7 +541,7 @@ static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) return new_frag->memory; } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) { CHECK_ERROR_PTR(); @@ -554,8 +574,8 @@ static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) } static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(local_size); @@ -571,8 +591,8 @@ static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, } static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(local_size); @@ -598,7 +618,7 @@ static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compi compiler->last_label = label; } -static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_si flags) +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_s32 flags) { jump->next = NULL; jump->flags = flags; @@ -654,19 +674,19 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp break; \ case SLJIT_BREAKPOINT: \ case SLJIT_NOP: \ - case SLJIT_LUMUL: \ - case SLJIT_LSMUL: \ + case SLJIT_LMUL_UW: \ + case SLJIT_LMUL_SW: \ case SLJIT_MOV: \ - case SLJIT_MOV_UI: \ + case SLJIT_MOV_U32: \ case SLJIT_MOV_P: \ case SLJIT_MOVU: \ - case SLJIT_MOVU_UI: \ + case SLJIT_MOVU_U32: \ case SLJIT_MOVU_P: \ /* Nothing allowed */ \ - CHECK_ARGUMENT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ break; \ default: \ - /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ + /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \ CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ break; \ } @@ -674,12 +694,12 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp #define FUNCTION_CHECK_FOP() \ CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ switch (GET_OPCODE(op)) { \ - case SLJIT_DCMP: \ + case SLJIT_CMP_F64: \ CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ break; \ default: \ - /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ + /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \ CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ break; \ } @@ -844,38 +864,38 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \ } -static SLJIT_CONST char* op0_names[] = { - (char*)"breakpoint", (char*)"nop", (char*)"lumul", (char*)"lsmul", - (char*)"udivmod", (char*)"sdivmod", (char*)"udivi", (char*)"sdivi" +static const char* op0_names[] = { + (char*)"breakpoint", (char*)"nop", (char*)"lmul.uw", (char*)"lmul.sw", + (char*)"divmod.u", (char*)"divmod.s", (char*)"div.u", (char*)"div.s" }; -static SLJIT_CONST char* op1_names[] = { - (char*)"mov", (char*)"mov_ub", (char*)"mov_sb", (char*)"mov_uh", - (char*)"mov_sh", (char*)"mov_ui", (char*)"mov_si", (char*)"mov_p", - (char*)"movu", (char*)"movu_ub", (char*)"movu_sb", (char*)"movu_uh", - (char*)"movu_sh", (char*)"movu_ui", (char*)"movu_si", (char*)"movu_p", +static const char* op1_names[] = { + (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", + (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", + (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", + (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", (char*)"not", (char*)"neg", (char*)"clz", }; -static SLJIT_CONST char* op2_names[] = { +static const char* op2_names[] = { (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", (char*)"shl", (char*)"lshr", (char*)"ashr", }; -static SLJIT_CONST char* fop1_names[] = { +static const char* fop1_names[] = { (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv", (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg", (char*)"abs", }; -static SLJIT_CONST char* fop2_names[] = { +static const char* fop2_names[] = { (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" }; -#define JUMP_PREFIX(type) \ - ((type & 0xff) <= SLJIT_MUL_NOT_OVERFLOW ? ((type & SLJIT_INT_OP) ? "i_" : "") \ - : ((type & 0xff) <= SLJIT_D_ORDERED ? ((type & SLJIT_SINGLE_OP) ? "s_" : "d_") : "")) +#define JUMP_POSTFIX(type) \ + ((type & 0xff) <= SLJIT_MUL_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ + : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_F32_OP) ? ".f32" : ".f64") : "")) static char* jump_names[] = { (char*)"equal", (char*)"not_equal", @@ -923,8 +943,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_com } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(compiler); @@ -949,8 +969,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -977,7 +997,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(compiler->scratches >= 0); @@ -993,7 +1013,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi if (op == SLJIT_UNUSED) fprintf(compiler->verbose, " return\n"); else { - fprintf(compiler->verbose, " return.%s ", op1_names[op - SLJIT_OP1_BASE]); + fprintf(compiler->verbose, " return%s ", op1_names[op - SLJIT_OP1_BASE]); sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } @@ -1002,7 +1022,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); @@ -1017,7 +1037,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_c CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_SRC(src, srcw); @@ -1032,23 +1052,29 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_ CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LSMUL) - || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIVMOD && (op & ~SLJIT_INT_OP) <= SLJIT_SDIVI)); - CHECK_ARGUMENT(op < SLJIT_LUMUL || compiler->scratches >= 2); + CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) + || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW)); + CHECK_ARGUMENT(op < SLJIT_LMUL_UW || compiler->scratches >= 2); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); + { + fprintf(compiler->verbose, " %s", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); + if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW) { + fprintf(compiler->verbose, (op & SLJIT_I32_OP) ? "32" : "w"); + } + fprintf(compiler->verbose, "\n"); + } #endif CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1064,9 +1090,18 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", - !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + if (GET_OPCODE(op) <= SLJIT_MOVU_P) + { + fprintf(compiler->verbose, " mov%s%s%s ", (GET_OPCODE(op) >= SLJIT_MOVU) ? "u" : "", + !(op & SLJIT_I32_OP) ? "" : "32", (op != SLJIT_MOV32 && op != SLJIT_MOVU32) ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ""); + } + else + { + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", + !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + } + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src, srcw); @@ -1076,10 +1111,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1095,7 +1130,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32", !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); sljit_verbose_param(compiler, dst, dstw); @@ -1109,7 +1144,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_si reg) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 reg) { SLJIT_UNUSED_ARG(reg); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1118,7 +1153,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_si re CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_si reg) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_s32 reg) { SLJIT_UNUSED_ARG(reg); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1128,7 +1163,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) + void *instruction, sljit_s32 size) { #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) int i; @@ -1152,16 +1187,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " op_custom"); for (i = 0; i < size; i++) - fprintf(compiler->verbose, " 0x%x", ((sljit_ub*)instruction)[i]); + fprintf(compiler->verbose, " 0x%x", ((sljit_u8*)instruction)[i]); fprintf(compiler->verbose, "\n"); } #endif CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1170,19 +1205,19 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_is_fpu_available()); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DMOV && GET_OPCODE(op) <= SLJIT_DABS); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64); FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src, srcw); FUNCTION_FCHECK(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) - fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONVD_FROMS - SLJIT_FOP1_BASE], - (op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms"); + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONV_F64_FROM_F32 - SLJIT_FOP1_BASE], + (op & SLJIT_F32_OP) ? ".f32.from.f64" : ".f64.from.f32"); else - fprintf(compiler->verbose, " %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", - fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE]); + fprintf(compiler->verbose, " %s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_F32_OP) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); @@ -1193,9 +1228,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1204,14 +1239,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_is_fpu_available()); - CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_DCMP); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64); FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop1_names[SLJIT_DCMP - SLJIT_FOP1_BASE], + fprintf(compiler->verbose, " %s%s%s%s ", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64", (op & SLJIT_SET_E) ? ".e" : "", (op & SLJIT_SET_S) ? ".s" : ""); sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); @@ -1222,9 +1257,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1233,7 +1268,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convw_fromd(struct s #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_is_fpu_available()); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CONVI_FROMD); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64); FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src, srcw); FUNCTION_CHECK_DST(dst, dstw); @@ -1241,8 +1276,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convw_fromd(struct s #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], - (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w", - (op & SLJIT_SINGLE_OP) ? "s" : "d"); + (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? ".s32" : ".sw", + (op & SLJIT_F32_OP) ? ".f32" : ".f64"); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src, srcw); @@ -1252,9 +1287,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convw_fromd(struct s CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1263,7 +1298,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convd_fromw(struct s #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_is_fpu_available()); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVD_FROMW && GET_OPCODE(op) <= SLJIT_CONVD_FROMI); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32); FUNCTION_CHECK_FOP(); FUNCTION_CHECK_SRC(src, srcw); FUNCTION_FCHECK(dst, dstw); @@ -1271,8 +1306,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convd_fromw(struct s #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], - (op & SLJIT_SINGLE_OP) ? "s" : "d", - (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w"); + (op & SLJIT_F32_OP) ? ".f32" : ".f64", + (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? ".s32" : ".sw"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src, srcw); @@ -1282,14 +1317,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convd_fromw(struct s CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_is_fpu_available()); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DADD && GET_OPCODE(op) <= SLJIT_DDIV); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64); FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); @@ -1297,7 +1332,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE]); + fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src1, src1w); @@ -1320,7 +1355,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1328,33 +1363,33 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3); - CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_INT_OP)); + CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP)); CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " jump%s.%s%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - JUMP_PREFIX(type), jump_names[type & 0xff]); + fprintf(compiler->verbose, " jump%s %s%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff], JUMP_POSTFIX(type)); #endif CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " cmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - (type & SLJIT_INT_OP) ? "i_" : "", jump_names[type & 0xff]); + fprintf(compiler->verbose, " cmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff], (type & SLJIT_I32_OP) ? "32" : ""); sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src2, src2w); @@ -1364,21 +1399,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_is_fpu_available()); - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_SINGLE_OP))); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_D_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_F32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " fcmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - (type & SLJIT_SINGLE_OP) ? "s_" : "d_", jump_names[type & 0xff]); + fprintf(compiler->verbose, " fcmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff], (type & SLJIT_F32_OP) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src2, src2w); @@ -1388,7 +1423,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1410,15 +1445,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compil CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP))); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); - CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_UI || GET_OPCODE(op) == SLJIT_MOV_SI + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); + CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_U32 || GET_OPCODE(op) == SLJIT_MOV_S32 || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); CHECK_ARGUMENT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0); CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS)); @@ -1431,21 +1466,22 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " flags.%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", - GET_OPCODE(op) >= SLJIT_OP2_BASE ? op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE] : op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + fprintf(compiler->verbose, " flags %s%s%s%s, ", + !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k", + GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : "")); sljit_verbose_param(compiler, dst, dstw); if (src != SLJIT_UNUSED) { fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src, srcw); } - fprintf(compiler->verbose, ", %s%s\n", JUMP_PREFIX(type), jump_names[type & 0xff]); + fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type)); } #endif CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { SLJIT_UNUSED_ARG(offset); @@ -1462,7 +1498,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_co CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { SLJIT_UNUSED_ARG(init_value); @@ -1482,31 +1518,31 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ #define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ - SLJIT_COMPILE_ASSERT(!(SLJIT_CONVW_FROMD & 0x1) && !(SLJIT_CONVD_FROMW & 0x1), \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1), \ invalid_float_opcodes); \ - if (GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_DCMP) { \ - if (GET_OPCODE(op) == SLJIT_DCMP) { \ + if (GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CMP_F64) { \ + if (GET_OPCODE(op) == SLJIT_CMP_F64) { \ CHECK(check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw)); \ ADJUST_LOCAL_OFFSET(dst, dstw); \ ADJUST_LOCAL_OFFSET(src, srcw); \ return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ } \ - if ((GET_OPCODE(op) | 0x1) == SLJIT_CONVI_FROMD) { \ - CHECK(check_sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw)); \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONV_S32_FROM_F64) { \ + CHECK(check_sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw)); \ ADJUST_LOCAL_OFFSET(dst, dstw); \ ADJUST_LOCAL_OFFSET(src, srcw); \ - return sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \ + return sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw); \ } \ - CHECK(check_sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw)); \ + CHECK(check_sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw)); \ ADJUST_LOCAL_OFFSET(dst, dstw); \ ADJUST_LOCAL_OFFSET(src, srcw); \ - return sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \ + return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \ } \ CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \ ADJUST_LOCAL_OFFSET(dst, dstw); \ ADJUST_LOCAL_OFFSET(src, srcw); -static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { /* Return if don't need to do anything. */ if (op == SLJIT_UNUSED) @@ -1517,7 +1553,7 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) return SLJIT_SUCCESS; #else - if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P)) + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P)) return SLJIT_SUCCESS; #endif @@ -1576,12 +1612,12 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi #if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { /* Default compare for most architectures. */ - sljit_si flags, tmp_src, condition; + sljit_s32 flags, tmp_src, condition; sljit_sw tmp_srcw; CHECK_ERROR_PTR(); @@ -1629,7 +1665,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler condition = SLJIT_SIG_GREATER_EQUAL; break; } - type = condition | (type & (SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP)); + type = condition | (type & (SLJIT_I32_OP | SLJIT_REWRITABLE_JUMP)); tmp_src = src1; src1 = src2; src2 = tmp_src; @@ -1649,7 +1685,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_INT_OP), + PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_I32_OP), SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1658,25 +1694,25 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si flags, condition; + sljit_s32 flags, condition; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); condition = type & 0xff; - flags = (condition <= SLJIT_D_NOT_EQUAL) ? SLJIT_SET_E : SLJIT_SET_S; - if (type & SLJIT_SINGLE_OP) - flags |= SLJIT_SINGLE_OP; + flags = (condition <= SLJIT_NOT_EQUAL_F64) ? SLJIT_SET_E : SLJIT_SET_S; + if (type & SLJIT_F32_OP) + flags |= SLJIT_F32_OP; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - sljit_emit_fop1(compiler, SLJIT_DCMP | flags, src1, src1w, src2, src2w); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | flags, src1, src1w, src2, src2w); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1689,7 +1725,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { CHECK_ERROR(); CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); @@ -1710,7 +1746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co /* Empty function bodies for those machines, which are not (yet) supported. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "unsupported"; } @@ -1727,7 +1763,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(size); @@ -1757,9 +1793,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(options); @@ -1773,9 +1809,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(options); @@ -1789,7 +1825,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); @@ -1799,7 +1835,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(dst); @@ -1808,7 +1844,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(src); @@ -1817,7 +1853,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); @@ -1825,9 +1861,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); @@ -1839,10 +1875,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); @@ -1856,14 +1892,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { SLJIT_ASSERT_STOP(); return reg; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(instruction); @@ -1872,15 +1908,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { SLJIT_ASSERT_STOP(); return 0; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); @@ -1892,10 +1928,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); @@ -1916,7 +1952,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return NULL; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); @@ -1924,9 +1960,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return NULL; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); @@ -1938,9 +1974,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler return NULL; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); @@ -1966,7 +2002,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); @@ -1976,10 +2012,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); @@ -1992,7 +2028,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(dst); @@ -2002,7 +2038,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw initval) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw initval) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(dst); @@ -2012,17 +2048,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return NULL; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { SLJIT_UNUSED_ARG(addr); - SLJIT_UNUSED_ARG(new_addr); + SLJIT_UNUSED_ARG(new_target); + SLJIT_UNUSED_ARG(executable_offset); SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { SLJIT_UNUSED_ARG(addr); SLJIT_UNUSED_ARG(new_constant); + SLJIT_UNUSED_ARG(executable_offset); SLJIT_ASSERT_STOP(); } diff --git a/pcre2/src/sljit/sljitLir.h b/pcre2/src/sljit/sljitLir.h index f0969dac2..f24f556b5 100644 --- a/pcre2/src/sljit/sljitLir.h +++ b/pcre2/src/sljit/sljitLir.h @@ -99,6 +99,8 @@ of sljitConfigInternal.h */ #define SLJIT_ERR_UNSUPPORTED 4 /* An ivalid argument is passed to any SLJIT function. */ #define SLJIT_ERR_BAD_ARGUMENT 5 +/* Dynamic code modification is not enabled. */ +#define SLJIT_ERR_DYN_CODE_MOD 6 /* --------------------------------------------------------------------- */ /* Registers */ @@ -226,7 +228,7 @@ of sljitConfigInternal.h */ /* Floating point registers */ /* --------------------------------------------------------------------- */ -/* Each floating point register can store a double or single precision +/* Each floating point register can store a 32 or a 64 bit precision value. The FR and FS register sets are overlap in the same way as R and S register sets. See above. */ @@ -271,7 +273,7 @@ struct sljit_memory_fragment { struct sljit_memory_fragment *next; sljit_uw used_size; /* Must be aligned to sljit_sw. */ - sljit_ub memory[1]; + sljit_u8 memory[1]; }; struct sljit_label { @@ -297,8 +299,8 @@ struct sljit_const { }; struct sljit_compiler { - sljit_si error; - sljit_si options; + sljit_s32 error; + sljit_s32 options; struct sljit_label *labels; struct sljit_jump *jumps; @@ -312,36 +314,38 @@ struct sljit_compiler { struct sljit_memory_fragment *abuf; /* Used scratch registers. */ - sljit_si scratches; + sljit_s32 scratches; /* Used saved registers. */ - sljit_si saveds; + sljit_s32 saveds; /* Used float scratch registers. */ - sljit_si fscratches; + sljit_s32 fscratches; /* Used float saved registers. */ - sljit_si fsaveds; + sljit_s32 fsaveds; /* Local stack size. */ - sljit_si local_size; + sljit_s32 local_size; /* Code size. */ sljit_uw size; - /* For statistical purposes. */ + /* Relative offset of the executable mapping from the writable mapping. */ + sljit_uw executable_offset; + /* Executable size for statistical purposes. */ sljit_uw executable_size; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_si args; + sljit_s32 args; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si mode32; + sljit_s32 mode32; #endif #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - sljit_si flags_saved; + sljit_s32 flags_saved; #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) /* Constant pool handling. */ sljit_uw *cpool; - sljit_ub *cpool_unique; + sljit_u8 *cpool_unique; sljit_uw cpool_diff; sljit_uw cpool_fill; /* Other members. */ @@ -352,40 +356,40 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Temporary fields. */ sljit_uw shift_imm; - sljit_si cache_arg; + sljit_s32 cache_arg; sljit_sw cache_argw; #endif #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) - sljit_si cache_arg; + sljit_s32 cache_arg; sljit_sw cache_argw; #endif #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) - sljit_si cache_arg; + sljit_s32 cache_arg; sljit_sw cache_argw; #endif #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) sljit_sw imm; - sljit_si cache_arg; + sljit_s32 cache_arg; sljit_sw cache_argw; #endif #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) - sljit_si delay_slot; - sljit_si cache_arg; + sljit_s32 delay_slot; + sljit_s32 cache_arg; sljit_sw cache_argw; #endif #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - sljit_si delay_slot; - sljit_si cache_arg; + sljit_s32 delay_slot; + sljit_s32 cache_arg; sljit_sw cache_argw; #endif #if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) - sljit_si cache_arg; + sljit_s32 cache_arg; sljit_sw cache_argw; #endif @@ -396,13 +400,13 @@ struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) /* Local size passed to the functions. */ - sljit_si logical_local_size; + sljit_s32 logical_local_size; #endif #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - sljit_si skip_checks; + sljit_s32 skip_checks; #endif }; @@ -427,7 +431,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile error code. Thus there is no need for checking the error after every call, it is enough to do it before the code is compiled. Removing these checks increases the performance of the compiling process. */ -static SLJIT_INLINE sljit_si sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } +static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } /* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except if an error was detected before. After the error code is set @@ -448,21 +452,40 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compi indicate that there is no more memory (does not set the current error code of the compiler to out-of-memory status). */ -SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size); +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) /* Passing NULL disables verbose. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); #endif +/* + Create executable code from the sljit instruction stream. This is the final step + of the code generation so no more instructions can be added after this call. +*/ + SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); + +/* Free executable code. */ + SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code); /* - After the machine code generation is finished we can retrieve the allocated - executable memory size, although this area may not be fully filled with - instructions depending on some optimizations. This function is useful only - for statistical purposes. + When the protected executable allocator is used the JIT code is mapped + twice. The first mapping has read/write and the second mapping has read/exec + permissions. This function returns with the relative offset of the executable + mapping using the writable mapping as the base after the machine code is + successfully generated. The returned value is always 0 for the normal executable + allocator, since it uses only one mapping with read/write/exec permissions. + Dynamic code modifications requires this value. + + Before a successful code generation, this function returns with 0. +*/ +static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; } + +/* + The executable memory consumption of the generated code can be retrieved by + this function. The returned value can be used for statistical purposes. Before a successful code generation, this function returns with 0. */ @@ -518,9 +541,9 @@ offset 0 is aligned to sljit_d. Otherwise it is aligned to sljit_uw. */ /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 65536 -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); /* The machine code has a context (which contains the local stack space size, number of used registers, etc.) which initialized by sljit_emit_enter. Several @@ -532,9 +555,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil Note: every call of sljit_emit_enter and sljit_set_context overwrites the previous context. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); /* Return from machine code. The op argument can be SLJIT_UNUSED which means the function does not return with anything or any opcode between SLJIT_MOV and @@ -542,8 +565,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi is SLJIT_UNUSED, otherwise see below the description about source and destination arguments. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, - sljit_si src, sljit_sw srcw); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw); /* Fast calling mechanism for utility functions (see SLJIT_FAST_CALL). All registers and even the stack frame is passed to the callee. The return address is preserved in @@ -560,8 +583,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi /* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested, since many architectures do clever branch prediction on call / return instruction pairs. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw); /* Source and destination values for arithmetical instructions @@ -624,31 +647,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * #define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) #define SLJIT_IMM 0x40 -/* Set 32 bit operation mode (I) on 64 bit CPUs. The flag is totally ignored on - 32 bit CPUs. If this flag is set for an arithmetic operation, it uses only the - lower 32 bit of the input register(s), and set the CPU status flags according - to the 32 bit result. The higher 32 bits are undefined for both the input and - output. However, the CPU might not ignore those higher 32 bits, like MIPS, which - expects it to be the sign extension of the lower 32 bit. All 32 bit operations - are undefined, if this condition is not fulfilled. Therefore, when SLJIT_INT_OP - is specified, all register arguments must be the result of other operations with - the same SLJIT_INT_OP flag. In other words, although a register can hold either - a 64 or 32 bit value, these values cannot be mixed. The only exceptions are - SLJIT_IMOV and SLJIT_IMOVU (SLJIT_MOV_SI/SLJIT_MOVU_SI with SLJIT_INT_OP flag) - which can convert any source argument to SLJIT_INT_OP compatible result. This - conversion might be unnecessary on some CPUs like x86-64, since the upper 32 - bit is always ignored. In this case SLJIT is clever enough to not generate any - instructions if the source and destination operands are the same registers. - Affects sljit_emit_op0, sljit_emit_op1 and sljit_emit_op2. */ -#define SLJIT_INT_OP 0x100 +/* Set 32 bit operation mode (I) on 64 bit CPUs. This flag is ignored on 32 + bit CPUs. When this flag is set for an arithmetic operation, only the + lower 32 bit of the input register(s) are used, and the CPU status flags + are set according to the 32 bit result. Although the higher 32 bit of + the input and the result registers are not defined by SLJIT, it might be + defined by the CPU architecture (e.g. MIPS). To satisfy these requirements + all source registers must be computed by operations where this flag is + also set. In other words 32 and 64 bit arithmetic operations cannot be + mixed. The only exception is SLJIT_IMOV and SLJIT_IMOVU whose source + register can hold any 32 or 64 bit value. This source register is + converted to a 32 bit compatible format. SLJIT does not generate any + instructions on certain CPUs (e.g. on x86 and ARM) if the source and + destination operands are the same registers. Affects sljit_emit_op0, + sljit_emit_op1 and sljit_emit_op2. */ +#define SLJIT_I32_OP 0x100 -/* Single precision mode (SP). This flag is similar to SLJIT_INT_OP, just +/* F32 precision mode (SP). This flag is similar to SLJIT_I32_OP, just it applies to floating point registers (it is even the same bit). When - this flag is passed, the CPU performs single precision floating point - operations. Similar to SLJIT_INT_OP, all register arguments must be the - result of other floating point operations with this flag. Affects + this flag is passed, the CPU performs 32 bit floating point operations. + Similar to SLJIT_I32_OP, all register arguments must be computed by + floating point operations where this flag is also set. Affects sljit_emit_fop1, sljit_emit_fop2 and sljit_emit_fcmp. */ -#define SLJIT_SINGLE_OP 0x100 +#define SLJIT_F32_OP 0x100 /* Common CPU status flags for all architectures (x86, ARM, PPC) - carry flag @@ -697,43 +718,41 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * /* Flags: - (may destroy flags) Unsigned multiplication of SLJIT_R0 and SLJIT_R1. Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ -#define SLJIT_LUMUL (SLJIT_OP0_BASE + 2) +#define SLJIT_LMUL_UW (SLJIT_OP0_BASE + 2) /* Flags: - (may destroy flags) Signed multiplication of SLJIT_R0 and SLJIT_R1. Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ -#define SLJIT_LSMUL (SLJIT_OP0_BASE + 3) +#define SLJIT_LMUL_SW (SLJIT_OP0_BASE + 3) /* Flags: I - (may destroy flags) Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. Note: if SLJIT_R1 is 0, the behaviour is undefined. */ -#define SLJIT_UDIVMOD (SLJIT_OP0_BASE + 4) -#define SLJIT_IUDIVMOD (SLJIT_UDIVMOD | SLJIT_INT_OP) +#define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) +#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_I32_OP) /* Flags: I - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. Note: if SLJIT_R1 is 0, the behaviour is undefined. Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), the behaviour is undefined. */ -#define SLJIT_SDIVMOD (SLJIT_OP0_BASE + 5) -#define SLJIT_ISDIVMOD (SLJIT_SDIVMOD | SLJIT_INT_OP) +#define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) +#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_I32_OP) /* Flags: I - (may destroy flags) Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. - Note: if SLJIT_R1 is 0, the behaviour is undefined. - Note: SLJIT_SDIV is single precision divide. */ -#define SLJIT_UDIVI (SLJIT_OP0_BASE + 6) -#define SLJIT_IUDIVI (SLJIT_UDIVI | SLJIT_INT_OP) + Note: if SLJIT_R1 is 0, the behaviour is undefined. */ +#define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) +#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_I32_OP) /* Flags: I - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. Note: if SLJIT_R1 is 0, the behaviour is undefined. Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), - the behaviour is undefined. - Note: SLJIT_SDIV is single precision divide. */ -#define SLJIT_SDIVI (SLJIT_OP0_BASE + 7) -#define SLJIT_ISDIVI (SLJIT_SDIVI | SLJIT_INT_OP) + the behaviour is undefined. */ +#define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) +#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_I32_OP) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op); /* Starting index of opcodes for sljit_emit_op1. */ #define SLJIT_OP1_BASE 32 @@ -752,216 +771,188 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler /* Flags: - (never set any flags) */ #define SLJIT_MOV (SLJIT_OP1_BASE + 0) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_UB (SLJIT_OP1_BASE + 1) -#define SLJIT_IMOV_UB (SLJIT_MOV_UB | SLJIT_INT_OP) +#define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) +#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_I32_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_SB (SLJIT_OP1_BASE + 2) -#define SLJIT_IMOV_SB (SLJIT_MOV_SB | SLJIT_INT_OP) +#define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) +#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_I32_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_UH (SLJIT_OP1_BASE + 3) -#define SLJIT_IMOV_UH (SLJIT_MOV_UH | SLJIT_INT_OP) +#define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) +#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_I32_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_SH (SLJIT_OP1_BASE + 4) -#define SLJIT_IMOV_SH (SLJIT_MOV_SH | SLJIT_INT_OP) +#define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) +#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_I32_OP) /* Flags: I - (never set any flags) - Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOV_UI (SLJIT_OP1_BASE + 5) -/* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOV. */ + Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ +#define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) /* Flags: I - (never set any flags) - Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOV_SI (SLJIT_OP1_BASE + 6) -#define SLJIT_IMOV (SLJIT_MOV_SI | SLJIT_INT_OP) + Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ +#define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) +/* Flags: I - (never set any flags) */ +#define SLJIT_MOV32 (SLJIT_MOV_S32 | SLJIT_I32_OP) /* Flags: - (never set any flags) */ #define SLJIT_MOV_P (SLJIT_OP1_BASE + 7) /* Flags: - (never set any flags) */ #define SLJIT_MOVU (SLJIT_OP1_BASE + 8) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_UB (SLJIT_OP1_BASE + 9) -#define SLJIT_IMOVU_UB (SLJIT_MOVU_UB | SLJIT_INT_OP) +#define SLJIT_MOVU_U8 (SLJIT_OP1_BASE + 9) +#define SLJIT_MOVU32_U8 (SLJIT_MOVU_U8 | SLJIT_I32_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_SB (SLJIT_OP1_BASE + 10) -#define SLJIT_IMOVU_SB (SLJIT_MOVU_SB | SLJIT_INT_OP) +#define SLJIT_MOVU_S8 (SLJIT_OP1_BASE + 10) +#define SLJIT_MOVU32_S8 (SLJIT_MOVU_S8 | SLJIT_I32_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_UH (SLJIT_OP1_BASE + 11) -#define SLJIT_IMOVU_UH (SLJIT_MOVU_UH | SLJIT_INT_OP) +#define SLJIT_MOVU_U16 (SLJIT_OP1_BASE + 11) +#define SLJIT_MOVU32_U16 (SLJIT_MOVU_U16 | SLJIT_I32_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_SH (SLJIT_OP1_BASE + 12) -#define SLJIT_IMOVU_SH (SLJIT_MOVU_SH | SLJIT_INT_OP) +#define SLJIT_MOVU_S16 (SLJIT_OP1_BASE + 12) +#define SLJIT_MOVU32_S16 (SLJIT_MOVU_S16 | SLJIT_I32_OP) /* Flags: I - (never set any flags) - Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOVU_UI (SLJIT_OP1_BASE + 13) -/* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOVU. */ + Note: no SLJIT_MOVU32_U32 form, since it is the same as SLJIT_MOVU32 */ +#define SLJIT_MOVU_U32 (SLJIT_OP1_BASE + 13) /* Flags: I - (never set any flags) - Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOVU_SI (SLJIT_OP1_BASE + 14) -#define SLJIT_IMOVU (SLJIT_MOVU_SI | SLJIT_INT_OP) + Note: no SLJIT_MOVU32_S32 form, since it is the same as SLJIT_MOVU32 */ +#define SLJIT_MOVU_S32 (SLJIT_OP1_BASE + 14) +/* Flags: I - (never set any flags) */ +#define SLJIT_MOVU32 (SLJIT_MOVU_S32 | SLJIT_I32_OP) /* Flags: - (never set any flags) */ #define SLJIT_MOVU_P (SLJIT_OP1_BASE + 15) /* Flags: I | E | K */ #define SLJIT_NOT (SLJIT_OP1_BASE + 16) -#define SLJIT_INOT (SLJIT_NOT | SLJIT_INT_OP) +#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_I32_OP) /* Flags: I | E | O | K */ #define SLJIT_NEG (SLJIT_OP1_BASE + 17) -#define SLJIT_INEG (SLJIT_NEG | SLJIT_INT_OP) +#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_I32_OP) /* Count leading zeroes Flags: I | E | K Important note! Sparc 32 does not support K flag, since the required popc instruction is introduced only in sparc 64. */ #define SLJIT_CLZ (SLJIT_OP1_BASE + 18) -#define SLJIT_ICLZ (SLJIT_CLZ | SLJIT_INT_OP) +#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_I32_OP) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); /* Starting index of opcodes for sljit_emit_op2. */ #define SLJIT_OP2_BASE 96 /* Flags: I | E | O | C | K */ #define SLJIT_ADD (SLJIT_OP2_BASE + 0) -#define SLJIT_IADD (SLJIT_ADD | SLJIT_INT_OP) +#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_I32_OP) /* Flags: I | C | K */ #define SLJIT_ADDC (SLJIT_OP2_BASE + 1) -#define SLJIT_IADDC (SLJIT_ADDC | SLJIT_INT_OP) +#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_I32_OP) /* Flags: I | E | U | S | O | C | K */ #define SLJIT_SUB (SLJIT_OP2_BASE + 2) -#define SLJIT_ISUB (SLJIT_SUB | SLJIT_INT_OP) +#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_I32_OP) /* Flags: I | C | K */ #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) -#define SLJIT_ISUBC (SLJIT_SUBC | SLJIT_INT_OP) +#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP) /* Note: integer mul Flags: I | O (see SLJIT_C_MUL_*) | K */ #define SLJIT_MUL (SLJIT_OP2_BASE + 4) -#define SLJIT_IMUL (SLJIT_MUL | SLJIT_INT_OP) +#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP) /* Flags: I | E | K */ #define SLJIT_AND (SLJIT_OP2_BASE + 5) -#define SLJIT_IAND (SLJIT_AND | SLJIT_INT_OP) +#define SLJIT_AND32 (SLJIT_AND | SLJIT_I32_OP) /* Flags: I | E | K */ #define SLJIT_OR (SLJIT_OP2_BASE + 6) -#define SLJIT_IOR (SLJIT_OR | SLJIT_INT_OP) +#define SLJIT_OR32 (SLJIT_OR | SLJIT_I32_OP) /* Flags: I | E | K */ #define SLJIT_XOR (SLJIT_OP2_BASE + 7) -#define SLJIT_IXOR (SLJIT_XOR | SLJIT_INT_OP) +#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_I32_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_SHL (SLJIT_OP2_BASE + 8) -#define SLJIT_ISHL (SLJIT_SHL | SLJIT_INT_OP) +#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_I32_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_LSHR (SLJIT_OP2_BASE + 9) -#define SLJIT_ILSHR (SLJIT_LSHR | SLJIT_INT_OP) +#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_I32_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_ASHR (SLJIT_OP2_BASE + 10) -#define SLJIT_IASHR (SLJIT_ASHR | SLJIT_INT_OP) +#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_I32_OP) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); - -/* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index ( >=0 ) of any SLJIT_R, - SLJIT_S and SLJIT_SP registers. - - Note: it returns with -1 for virtual registers (only on x86-32). */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg); - -/* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index of any SLJIT_FLOAT register. - - Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg); - -/* Any instruction can be inserted into the instruction stream by - sljit_emit_op_custom. It has a similar purpose as inline assembly. - The size parameter must match to the instruction size of the target - architecture: - - x86: 0 < size <= 15. The instruction argument can be byte aligned. - Thumb2: if size == 2, the instruction argument must be 2 byte aligned. - if size == 4, the instruction argument must be 4 byte aligned. - Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); /* Returns with non-zero if fpu is available. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void); /* Starting index of opcodes for sljit_emit_fop1. */ #define SLJIT_FOP1_BASE 128 /* Flags: SP - (never set any flags) */ -#define SLJIT_DMOV (SLJIT_FOP1_BASE + 0) -#define SLJIT_SMOV (SLJIT_DMOV | SLJIT_SINGLE_OP) +#define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) +#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_F32_OP) /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int Rounding mode when the destination is W or I: round towards zero. */ /* Flags: SP - (never set any flags) */ -#define SLJIT_CONVD_FROMS (SLJIT_FOP1_BASE + 1) -#define SLJIT_CONVS_FROMD (SLJIT_CONVD_FROMS | SLJIT_SINGLE_OP) +#define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) +#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_CONVW_FROMD (SLJIT_FOP1_BASE + 2) -#define SLJIT_CONVW_FROMS (SLJIT_CONVW_FROMD | SLJIT_SINGLE_OP) +#define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) +#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_CONVI_FROMD (SLJIT_FOP1_BASE + 3) -#define SLJIT_CONVI_FROMS (SLJIT_CONVI_FROMD | SLJIT_SINGLE_OP) +#define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) +#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_CONVD_FROMW (SLJIT_FOP1_BASE + 4) -#define SLJIT_CONVS_FROMW (SLJIT_CONVD_FROMW | SLJIT_SINGLE_OP) +#define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) +#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_CONVD_FROMI (SLJIT_FOP1_BASE + 5) -#define SLJIT_CONVS_FROMI (SLJIT_CONVD_FROMI | SLJIT_SINGLE_OP) +#define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) +#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP) /* Note: dst is the left and src is the right operand for SLJIT_CMPD. Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag is set, the comparison result is unpredictable. Flags: SP | E | S (see SLJIT_C_FLOAT_*) */ -#define SLJIT_DCMP (SLJIT_FOP1_BASE + 6) -#define SLJIT_SCMP (SLJIT_DCMP | SLJIT_SINGLE_OP) +#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6) +#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_DNEG (SLJIT_FOP1_BASE + 7) -#define SLJIT_SNEG (SLJIT_DNEG | SLJIT_SINGLE_OP) +#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7) +#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_DABS (SLJIT_FOP1_BASE + 8) -#define SLJIT_SABS (SLJIT_DABS | SLJIT_SINGLE_OP) +#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8) +#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_F32_OP) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); /* Starting index of opcodes for sljit_emit_fop2. */ #define SLJIT_FOP2_BASE 160 /* Flags: SP - (never set any flags) */ -#define SLJIT_DADD (SLJIT_FOP2_BASE + 0) -#define SLJIT_SADD (SLJIT_DADD | SLJIT_SINGLE_OP) +#define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) +#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_DSUB (SLJIT_FOP2_BASE + 1) -#define SLJIT_SSUB (SLJIT_DSUB | SLJIT_SINGLE_OP) +#define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) +#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_DMUL (SLJIT_FOP2_BASE + 2) -#define SLJIT_SMUL (SLJIT_DMUL | SLJIT_SINGLE_OP) +#define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) +#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_F32_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_DDIV (SLJIT_FOP2_BASE + 3) -#define SLJIT_SDIV (SLJIT_DDIV | SLJIT_SINGLE_OP) +#define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) +#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_F32_OP) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); /* Label and jump instructions. */ @@ -971,58 +962,58 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi /* Integer comparison types. */ #define SLJIT_EQUAL 0 -#define SLJIT_I_EQUAL (SLJIT_EQUAL | SLJIT_INT_OP) +#define SLJIT_EQUAL32 (SLJIT_EQUAL | SLJIT_I32_OP) #define SLJIT_ZERO 0 -#define SLJIT_I_ZERO (SLJIT_ZERO | SLJIT_INT_OP) +#define SLJIT_ZERO32 (SLJIT_ZERO | SLJIT_I32_OP) #define SLJIT_NOT_EQUAL 1 -#define SLJIT_I_NOT_EQUAL (SLJIT_NOT_EQUAL | SLJIT_INT_OP) +#define SLJIT_NOT_EQUAL32 (SLJIT_NOT_EQUAL | SLJIT_I32_OP) #define SLJIT_NOT_ZERO 1 -#define SLJIT_I_NOT_ZERO (SLJIT_NOT_ZERO | SLJIT_INT_OP) +#define SLJIT_NOT_ZERO32 (SLJIT_NOT_ZERO | SLJIT_I32_OP) #define SLJIT_LESS 2 -#define SLJIT_I_LESS (SLJIT_LESS | SLJIT_INT_OP) +#define SLJIT_LESS32 (SLJIT_LESS | SLJIT_I32_OP) #define SLJIT_GREATER_EQUAL 3 -#define SLJIT_I_GREATER_EQUAL (SLJIT_GREATER_EQUAL | SLJIT_INT_OP) +#define SLJIT_GREATER_EQUAL32 (SLJIT_GREATER_EQUAL | SLJIT_I32_OP) #define SLJIT_GREATER 4 -#define SLJIT_I_GREATER (SLJIT_GREATER | SLJIT_INT_OP) +#define SLJIT_GREATER32 (SLJIT_GREATER | SLJIT_I32_OP) #define SLJIT_LESS_EQUAL 5 -#define SLJIT_I_LESS_EQUAL (SLJIT_LESS_EQUAL | SLJIT_INT_OP) +#define SLJIT_LESS_EQUAL32 (SLJIT_LESS_EQUAL | SLJIT_I32_OP) #define SLJIT_SIG_LESS 6 -#define SLJIT_I_SIG_LESS (SLJIT_SIG_LESS | SLJIT_INT_OP) +#define SLJIT_SIG_LESS32 (SLJIT_SIG_LESS | SLJIT_I32_OP) #define SLJIT_SIG_GREATER_EQUAL 7 -#define SLJIT_I_SIG_GREATER_EQUAL (SLJIT_SIG_GREATER_EQUAL | SLJIT_INT_OP) +#define SLJIT_SIG_GREATER_EQUAL32 (SLJIT_SIG_GREATER_EQUAL | SLJIT_I32_OP) #define SLJIT_SIG_GREATER 8 -#define SLJIT_I_SIG_GREATER (SLJIT_SIG_GREATER | SLJIT_INT_OP) +#define SLJIT_SIG_GREATER32 (SLJIT_SIG_GREATER | SLJIT_I32_OP) #define SLJIT_SIG_LESS_EQUAL 9 -#define SLJIT_I_SIG_LESS_EQUAL (SLJIT_SIG_LESS_EQUAL | SLJIT_INT_OP) +#define SLJIT_SIG_LESS_EQUAL32 (SLJIT_SIG_LESS_EQUAL | SLJIT_I32_OP) #define SLJIT_OVERFLOW 10 -#define SLJIT_I_OVERFLOW (SLJIT_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_OVERFLOW32 (SLJIT_OVERFLOW | SLJIT_I32_OP) #define SLJIT_NOT_OVERFLOW 11 -#define SLJIT_I_NOT_OVERFLOW (SLJIT_NOT_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_NOT_OVERFLOW32 (SLJIT_NOT_OVERFLOW | SLJIT_I32_OP) #define SLJIT_MUL_OVERFLOW 12 -#define SLJIT_I_MUL_OVERFLOW (SLJIT_MUL_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_MUL_OVERFLOW32 (SLJIT_MUL_OVERFLOW | SLJIT_I32_OP) #define SLJIT_MUL_NOT_OVERFLOW 13 -#define SLJIT_I_MUL_NOT_OVERFLOW (SLJIT_MUL_NOT_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_MUL_NOT_OVERFLOW32 (SLJIT_MUL_NOT_OVERFLOW | SLJIT_I32_OP) /* Floating point comparison types. */ -#define SLJIT_D_EQUAL 14 -#define SLJIT_S_EQUAL (SLJIT_D_EQUAL | SLJIT_SINGLE_OP) -#define SLJIT_D_NOT_EQUAL 15 -#define SLJIT_S_NOT_EQUAL (SLJIT_D_NOT_EQUAL | SLJIT_SINGLE_OP) -#define SLJIT_D_LESS 16 -#define SLJIT_S_LESS (SLJIT_D_LESS | SLJIT_SINGLE_OP) -#define SLJIT_D_GREATER_EQUAL 17 -#define SLJIT_S_GREATER_EQUAL (SLJIT_D_GREATER_EQUAL | SLJIT_SINGLE_OP) -#define SLJIT_D_GREATER 18 -#define SLJIT_S_GREATER (SLJIT_D_GREATER | SLJIT_SINGLE_OP) -#define SLJIT_D_LESS_EQUAL 19 -#define SLJIT_S_LESS_EQUAL (SLJIT_D_LESS_EQUAL | SLJIT_SINGLE_OP) -#define SLJIT_D_UNORDERED 20 -#define SLJIT_S_UNORDERED (SLJIT_D_UNORDERED | SLJIT_SINGLE_OP) -#define SLJIT_D_ORDERED 21 -#define SLJIT_S_ORDERED (SLJIT_D_ORDERED | SLJIT_SINGLE_OP) +#define SLJIT_EQUAL_F64 14 +#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_NOT_EQUAL_F64 15 +#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_LESS_F64 16 +#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP) +#define SLJIT_GREATER_EQUAL_F64 17 +#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_GREATER_F64 18 +#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP) +#define SLJIT_LESS_EQUAL_F64 19 +#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_UNORDERED_F64 20 +#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP) +#define SLJIT_ORDERED_F64 21 +#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP) /* Unconditional jump types. */ #define SLJIT_JUMP 22 @@ -1042,7 +1033,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: - (never set any flags) for both conditional and unconditional jumps. Flags: destroy all flags for calls. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type); /* Basic arithmetic comparison. In most architectures it is implemented as an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting @@ -1052,23 +1043,23 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: destroy flags. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); /* Basic floating point comparison. In most architectures it is implemented as an SLJIT_FCMP operation (setting appropriate flags) followed by a sljit_emit_jump. However some architectures (i.e: MIPS) may employ special optimizations here. It is suggested to use this comparison form when appropriate. - type must be between SLJIT_D_EQUAL and SLJIT_S_ORDERED + type must be between SLJIT_EQUAL_F64 and SLJIT_ORDERED_F32 type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: destroy flags. Note: if either operand is NaN, the behaviour is undefined for types up to SLJIT_S_LESS_EQUAL. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); /* Set the destination of the jump to this label. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); @@ -1081,14 +1072,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw Indirect form: any other valid addressing mode Flags: - (never set any flags) for unconditional jumps. Flags: destroy all flags for calls. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw); /* Perform the operation using the conditional flags as the second argument. Type must always be between SLJIT_EQUAL and SLJIT_S_ORDERED. The value represented by the type is 1, if the condition represented by the type is fulfilled, and 0 otherwise. - If op == SLJIT_MOV, SLJIT_MOV_SI, SLJIT_MOV_UI: + If op == SLJIT_MOV, SLJIT_MOV_S32, SLJIT_MOV_U32: Set dst to the value represented by the type (0 or 1). Src must be SLJIT_UNUSED, and srcw must be 0 Flags: - (never set any flags) @@ -1098,18 +1089,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil Important note: only dst=src and dstw=srcw is supported at the moment! Flags: I | E | K Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type); /* Copies the base address of SLJIT_SP + offset to dst. Flags: - (never set any flags) */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset); /* The constant can be changed runtime (see: sljit_set_const) Flags: - (never set any flags) */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value); /* After the code generation the address for label, jump and const instructions are computed. Since these structures are freed by sljit_free_compiler, the @@ -1118,9 +1109,10 @@ static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { r static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } -/* Only the address is required to rewrite the code. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr); -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant); +/* Only the address and executable offset are required to perform dynamic + code modifications. See sljit_get_executable_offset function. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset); /* --------------------------------------------------------------------- */ /* Miscellaneous utility functions */ @@ -1132,7 +1124,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta /* Get the human readable name of the platform. Can be useful on platforms like ARM, where ARM and Thumb2 functions can be mixed, and it is useful to know the type of the code generator. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void); +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void); /* Portable helper function to get an offset of a member. */ #define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) @@ -1214,4 +1206,64 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ +/* --------------------------------------------------------------------- */ +/* CPU specific functions */ +/* --------------------------------------------------------------------- */ + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index ( >=0 ) of any SLJIT_R, + SLJIT_S and SLJIT_SP registers. + + Note: it returns with -1 for virtual registers (only on x86-32). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg); + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index of any SLJIT_FLOAT register. + + Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15. The instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size); + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +/* Returns with non-zero if sse2 is available. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void); + +/* Returns with non-zero if cmov instruction is available. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void); + +/* Emit a conditional mov instruction on x86 CPUs. This instruction + moves src to destination, if the condition is satisfied. Unlike + other arithmetic instructions, destination must be a register. + Before such instructions are emitted, cmov support should be + checked by sljit_x86_is_cmov_available function. + type must be between SLJIT_EQUAL and SLJIT_S_ORDERED + dst_reg must be a valid register and it can be combined + with SLJIT_I32_OP to perform 32 bit arithmetic + Flags: I - (never set any flags) + */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, + sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw); + +#endif + #endif /* _SLJIT_LIR_H_ */ diff --git a/pcre2/src/sljit/sljitNativeARM_32.c b/pcre2/src/sljit/sljitNativeARM_32.c index 5cd4c71a2..09701d53f 100644 --- a/pcre2/src/sljit/sljitNativeARM_32.c +++ b/pcre2/src/sljit/sljitNativeARM_32.c @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) return "ARMv7" SLJIT_CPUINFO; @@ -52,10 +52,10 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) #define ALIGN_INSTRUCTION(ptr) \ (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) #define MAX_DIFFERENCE(max_diff) \ - (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) + (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15 }; @@ -126,13 +126,13 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) -static sljit_si push_cpool(struct sljit_compiler *compiler) +static sljit_s32 push_cpool(struct sljit_compiler *compiler) { /* Pushing the constant pool into the instruction stream. */ sljit_uw* inst; sljit_uw* cpool_ptr; sljit_uw* cpool_end; - sljit_si i; + sljit_s32 i; /* The label could point the address after the constant pool. */ if (compiler->last_label && compiler->last_label->size == compiler->size) @@ -164,7 +164,7 @@ static sljit_si push_cpool(struct sljit_compiler *compiler) return SLJIT_SUCCESS; } -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) { sljit_uw* ptr; @@ -178,13 +178,13 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) return SLJIT_SUCCESS; } -static sljit_si push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) { sljit_uw* ptr; sljit_uw cpool_index = CPOOL_SIZE; sljit_uw* cpool_ptr; sljit_uw* cpool_end; - sljit_ub* cpool_unique_ptr; + sljit_u8* cpool_unique_ptr; if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) FAIL_IF(push_cpool(compiler)); @@ -228,7 +228,7 @@ static sljit_si push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw return SLJIT_SUCCESS; } -static sljit_si push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) { sljit_uw* ptr; if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) @@ -248,7 +248,7 @@ static sljit_si push_inst_with_unique_literal(struct sljit_compiler *compiler, s return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si prepare_blx(struct sljit_compiler *compiler) +static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler) { /* Place for at least two instruction (doesn't matter whether the first has a literal). */ if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) @@ -256,7 +256,7 @@ static SLJIT_INLINE sljit_si prepare_blx(struct sljit_compiler *compiler) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_blx(struct sljit_compiler *compiler) +static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) { /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); @@ -286,7 +286,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ /* Must be a load instruction with immediate offset. */ SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); - if ((sljit_si)const_pool[ind] < 0) { + if ((sljit_s32)const_pool[ind] < 0) { const_pool[ind] = counter; ind = counter; counter++; @@ -311,26 +311,26 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ struct future_patch { struct future_patch* next; - sljit_si index; - sljit_si value; + sljit_s32 index; + sljit_s32 value; }; -static sljit_si resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) +static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) { - sljit_si value; + sljit_s32 value; struct future_patch *curr_patch, *prev_patch; SLJIT_UNUSED_ARG(compiler); /* Using the values generated by patch_pc_relative_loads. */ if (!*first_patch) - value = (sljit_si)cpool_start_address[cpool_current_index]; + value = (sljit_s32)cpool_start_address[cpool_current_index]; else { curr_patch = *first_patch; - prev_patch = 0; + prev_patch = NULL; while (1) { if (!curr_patch) { - value = (sljit_si)cpool_start_address[cpool_current_index]; + value = (sljit_s32)cpool_start_address[cpool_current_index]; break; } if ((sljit_uw)curr_patch->index == cpool_current_index) { @@ -370,7 +370,7 @@ static sljit_si resolve_const_pool_index(struct sljit_compiler *compiler, struct #else -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) { sljit_uw* ptr; @@ -381,7 +381,7 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_imm(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); @@ -389,7 +389,7 @@ static SLJIT_INLINE sljit_si emit_imm(struct sljit_compiler *compiler, sljit_si #endif -static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset) { sljit_sw diff; @@ -401,7 +401,7 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw code_ptr--; if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)); + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); @@ -426,7 +426,7 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw } #else if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr); + diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset); else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); @@ -446,26 +446,28 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw return 0; } -static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_si flush) +static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_uw *ptr = (sljit_uw*)addr; - sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw *ptr = (sljit_uw *)jump_ptr; + sljit_uw *inst = (sljit_uw *)ptr[0]; sljit_uw mov_pc = ptr[1]; - sljit_si bl = (mov_pc & 0x0000f000) != RD(TMP_PC); - sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2); + sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); + sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2); if (diff <= 0x7fffff && diff >= -0x800000) { /* Turn to branch. */ if (!bl) { inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } else { inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); inst[1] = NOP; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } } @@ -479,12 +481,14 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, if (*inst != mov_pc) { inst[0] = mov_pc; if (!bl) { - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } else { inst[1] = BLX | RM(TMP_REG1); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } } @@ -492,11 +496,12 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, *ptr = new_addr; } #else - sljit_uw *inst = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)jump_ptr; SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } #endif @@ -504,7 +509,7 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, static sljit_uw get_imm(sljit_uw imm); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_si flush) +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw *ptr = (sljit_uw*)addr; @@ -515,7 +520,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, src2 = get_imm(new_constant); if (src2) { *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } return; @@ -524,7 +530,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, src2 = get_imm(~new_constant); if (src2) { *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } return; @@ -537,7 +544,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, if (*inst != ldr_literal) { *inst = ldr_literal; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } @@ -547,7 +555,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } #endif @@ -562,6 +571,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw *buf_end; sljit_uw size; sljit_uw word_count; + sljit_sw executable_offset; + sljit_sw jump_addr; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; @@ -602,14 +613,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; if (label && label->size == 0) { - label->addr = (sljit_uw)code; - label->size = 0; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); label = label->next; } @@ -636,7 +647,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_size = 0; if (label && label->size == word_count) { /* Points after the current instruction. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -652,19 +663,19 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!const_ || const_->addr >= word_count); if (jump && jump->addr == word_count) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (detect_jump_type(jump, code_ptr, code)) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) code_ptr--; jump->addr = (sljit_uw)code_ptr; #else jump->addr = (sljit_uw)(code_ptr - 2); - if (detect_jump_type(jump, code_ptr, code)) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) code_ptr -= 2; #endif jump = jump->next; } if (label && label->size == word_count) { /* code_ptr can be affected above. */ - label->addr = (sljit_uw)(code_ptr + 1); + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); label->size = (code_ptr + 1) - code; label = label->next; } @@ -729,17 +740,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - buf_ptr = (sljit_uw*)jump->addr; + buf_ptr = (sljit_uw *)jump->addr; if (jump->flags & PATCH_B) { + jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); if (!(jump->flags & JUMP_ADDR)) { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; + SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff; } else { - SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; + SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff; } } else if (jump->flags & SLJIT_REWRITABLE_JUMP) { @@ -747,10 +759,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump->addr = (sljit_uw)code_ptr; code_ptr[0] = (sljit_uw)buf_ptr; code_ptr[1] = *buf_ptr; - inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); code_ptr += 2; #else - inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); #endif } else { @@ -763,7 +775,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr += 1; *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; #else - inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); #endif } jump = jump->next; @@ -782,17 +794,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil else buf_ptr += 1; /* Set the value again (can be a simple constant). */ - inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0); + inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); code_ptr += 2; const_ = const_->next; } #endif - SLJIT_ASSERT(code_ptr - code <= (sljit_si)size); + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); + + code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); return code; } @@ -820,16 +837,16 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si size, i, tmp; + sljit_s32 size, i, tmp; sljit_uw push; CHECK_ERROR(); @@ -866,11 +883,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si size; + sljit_s32 size; CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -881,9 +898,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si i, tmp; + sljit_s32 i, tmp; sljit_uw pop; CHECK_ERROR(); @@ -983,8 +1000,8 @@ static sljit_sw data_transfer_insts[16] = { } \ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1]))); -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_si src2) +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) { sljit_sw mul_inst; @@ -1001,17 +1018,17 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } return SLJIT_SUCCESS; - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (op == SLJIT_MOV_UB) + if (op == SLJIT_MOV_U8) return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | reg_map[dst])); #else - return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); + return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); #endif } else if (dst != src2) { @@ -1022,15 +1039,15 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } return SLJIT_SUCCESS; - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | reg_map[dst])); #else - return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); + return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); #endif } else if (dst != src2) { @@ -1139,7 +1156,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj Returns with 0 if not possible. */ static sljit_uw get_imm(sljit_uw imm) { - sljit_si rol; + sljit_s32 rol; if (imm <= 0xff) return SRC2_IMM | imm; @@ -1175,12 +1192,12 @@ static sljit_uw get_imm(sljit_uw imm) } #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) -static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm, sljit_si positive) +static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive) { sljit_uw mask; sljit_uw imm1; sljit_uw imm2; - sljit_si rol; + sljit_s32 rol; /* Step1: Search a zero byte (8 continous zero bit). */ mask = 0xff000000; @@ -1286,7 +1303,7 @@ static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, slji } #endif -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm) { sljit_uw tmp; @@ -1317,7 +1334,7 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sl } /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ -static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) +static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) { if (value >= 0) { value = get_imm(value); @@ -1333,7 +1350,7 @@ static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sl } /* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_uw imm; @@ -1408,7 +1425,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl /* See getput_arg below. Note: can_cache is called only for binary operators. Those operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { /* Immediate caching is not supported as it would be an operation on constant arguments. */ if (arg & SLJIT_IMM) @@ -1456,9 +1473,9 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ } /* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { - sljit_si tmp_r; + sljit_s32 tmp_r; sljit_sw max_delta; sljit_sw sign; sljit_uw imm; @@ -1583,7 +1600,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); } -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg, arg, argw)) return compiler->error; @@ -1592,17 +1609,17 @@ static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_ return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { /* arg1 goes to TMP_REG1 or src reg arg2 goes to TMP_REG2, imm or src reg @@ -1610,25 +1627,25 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ /* We prefers register and simple consts. */ - sljit_si dst_r; - sljit_si src1_r; - sljit_si src2_r = 0; - sljit_si sugg_src2_r = TMP_REG2; - sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 dst_r; + sljit_s32 src1_r; + sljit_s32 src2_r = 0; + sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0; compiler->cache_arg = 0; compiler->cache_argw = 0; /* Destination check. */ if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) return SLJIT_SUCCESS; dst_r = TMP_REG2; } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) sugg_src2_r = dst_r; } else { @@ -1695,7 +1712,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) dst_r = src2_r; } else do { /* do { } while(0) is used because of breaks. */ @@ -1804,7 +1821,7 @@ extern int __aeabi_idivmod(int numerator, int denominator); } #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1817,58 +1834,58 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_NOP: FAIL_IF(push_inst(compiler, NOP)); break; - case SLJIT_LUMUL: - case SLJIT_LSMUL: + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) | (reg_map[SLJIT_R1] << 16) | (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 8) | reg_map[SLJIT_R1]); #else FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1)))); - return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) | (reg_map[SLJIT_R1] << 16) | (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 8) | reg_map[TMP_REG1]); #endif - case SLJIT_UDIVMOD: - case SLJIT_SDIVMOD: - case SLJIT_UDIVI: - case SLJIT_SDIVI: - SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping); - if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) { + if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) { FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */)); FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */)); } - else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3)) - FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */)); + else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3)) + FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */)); #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif - if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) { + if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) { FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */)); FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */)); } - else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3)) - return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */); + else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3)) + return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */); return SLJIT_SUCCESS; } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -1877,40 +1894,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); - case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); - case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); - case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_MOVU: - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: + case SLJIT_MOVU_U32: + case SLJIT_MOVU_S32: case SLJIT_MOVU_P: return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + case SLJIT_MOVU_U8: + return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); - case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + case SLJIT_MOVU_S8: + return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); - case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + case SLJIT_MOVU_U16: + return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); - case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + case SLJIT_MOVU_S16: + return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_NOT: return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); @@ -1929,10 +1946,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1971,20 +1988,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg << 1; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -2000,7 +2017,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* 0 - no fpu 1 - vfp */ -static sljit_si arm_fpu_type = -1; +static sljit_s32 arm_fpu_type = -1; static void init_compiler(void) { @@ -2011,7 +2028,7 @@ static void init_compiler(void) arm_fpu_type = 1; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; @@ -2026,7 +2043,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define arm_fpu_type 1 -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { /* Always available. */ return 1; @@ -2040,11 +2057,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16)) -static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_sw tmp; sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD)); + sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { @@ -2104,16 +2121,16 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; @@ -2125,11 +2142,11 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); @@ -2142,85 +2159,85 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16))); } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0))); return push_inst(compiler, VMRS); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_DMOV: + case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0))); else dst_r = src; } break; - case SLJIT_DNEG: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0))); break; - case SLJIT_DABS: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0))); break; - case SLJIT_CONVD_FROMS: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); - op ^= SLJIT_SINGLE_OP; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + op ^= SLJIT_F32_OP; break; } if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -2230,40 +2247,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile compiler->cache_arg = 0; compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; + op ^= SLJIT_F32_OP; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } switch (GET_OPCODE(op)) { - case SLJIT_DADD: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); break; - case SLJIT_DSUB: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); break; - case SLJIT_DMUL: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); break; - case SLJIT_DDIV: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); break; } if (dst_r == TMP_FREG1) - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; } @@ -2276,7 +2293,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -2299,7 +2316,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -2326,33 +2343,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_si type) +static sljit_uw get_cc(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: case SLJIT_MUL_NOT_OVERFLOW: - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: return 0x00000000; case SLJIT_NOT_EQUAL: case SLJIT_MUL_OVERFLOW: - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: return 0x10000000; case SLJIT_LESS: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: return 0x30000000; case SLJIT_GREATER_EQUAL: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: return 0x20000000; case SLJIT_GREATER: - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: return 0x80000000; case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: return 0x90000000; case SLJIT_SIG_LESS: @@ -2368,11 +2385,11 @@ static sljit_uw get_cc(sljit_si type) return 0xd0000000; case SLJIT_OVERFLOW: - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: return 0x60000000; case SLJIT_NOT_OVERFLOW: - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: return 0x70000000; default: @@ -2397,7 +2414,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; @@ -2438,7 +2455,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; @@ -2475,12 +2492,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { - sljit_si dst_r, flags = GET_ALL_FLAGS(op); + sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); sljit_uw cc, ins; CHECK_ERROR(); @@ -2528,10 +2545,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; - sljit_si reg; + sljit_s32 reg; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); @@ -2555,12 +2572,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - inline_set_jump_addr(addr, new_addr, 1); + inline_set_jump_addr(addr, executable_offset, new_target, 1); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - inline_set_const(addr, new_constant, 1); + inline_set_const(addr, executable_offset, new_constant, 1); } diff --git a/pcre2/src/sljit/sljitNativeARM_64.c b/pcre2/src/sljit/sljitNativeARM_64.c index 044a675ee..2062d80b0 100644 --- a/pcre2/src/sljit/sljitNativeARM_64.c +++ b/pcre2/src/sljit/sljitNativeARM_64.c @@ -24,13 +24,13 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "ARM-64" SLJIT_CPUINFO; } /* Length of an instruction word */ -typedef sljit_ui sljit_ins; +typedef sljit_u32 sljit_ins; #define TMP_ZERO (0) @@ -43,7 +43,7 @@ typedef sljit_ui sljit_ins; #define TMP_FREG1 (0) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31 }; @@ -124,7 +124,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { /* dest_reg is the absolute name of the register Useful for reordering instructions in the delay slot. */ -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); @@ -133,7 +133,7 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_imm64_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) +static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); @@ -143,7 +143,7 @@ static SLJIT_INLINE sljit_si emit_imm64_const(struct sljit_compiler *compiler, s static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm) { - sljit_si dst = inst[0] & 0x1f; + sljit_s32 dst = inst[0] & 0x1f; SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5); inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21); @@ -151,7 +151,7 @@ static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm) inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21); } -static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -165,9 +165,10 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4); + + diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; if (jump->flags & IS_COND) { diff += sizeof(sljit_ins); @@ -211,8 +212,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; - sljit_si dst; + sljit_s32 dst; struct sljit_label *label; struct sljit_jump *jump; @@ -228,6 +230,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; @@ -242,13 +246,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } if (jump && jump->addr == word_count) { jump->addr = (sljit_uw)(code_ptr - 4); - code_ptr -= detect_jump_type(jump, code_ptr, code); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == word_count) { @@ -263,7 +267,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -277,9 +281,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; + if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); if (jump->flags & IS_COND) @@ -287,7 +292,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } if (jump->flags & PATCH_COND) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); break; @@ -308,7 +313,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); return code; } @@ -346,9 +356,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #define LOGICAL_IMM_CHECK 0x100 -static sljit_ins logical_imm(sljit_sw imm, sljit_si len) +static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) { - sljit_si negated, ones, right; + sljit_s32 negated, ones, right; sljit_uw mask, uimm; sljit_ins ins; @@ -356,12 +366,12 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_si len) len &= ~LOGICAL_IMM_CHECK; if (len == 32 && (imm == 0 || imm == -1)) return 0; - if (len == 16 && ((sljit_si)imm == 0 || (sljit_si)imm == -1)) + if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1)) return 0; } SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) - || (len == 16 && (sljit_si)imm != 0 && (sljit_si)imm != -1)); + || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1)); uimm = (sljit_uw)imm; while (1) { if (len <= 0) { @@ -410,10 +420,10 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_si len) #undef COUNT_TRAILING_ZERO -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw simm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) { sljit_uw imm = (sljit_uw)simm; - sljit_si i, zeros, ones, first; + sljit_s32 i, zeros, ones, first; sljit_ins bitmask; if (imm <= 0xffff) @@ -512,15 +522,15 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sl dst = TMP_ZERO; \ } -static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_sw arg1, sljit_sw arg2) +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2) { /* dst must be register, TMP_REG1 arg1 must be register, TMP_REG1, imm arg2 must be register, TMP_REG2, imm */ sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0; sljit_ins inst_bits; - sljit_si op = (flags & 0xffff); - sljit_si reg; + sljit_s32 op = (flags & 0xffff); + sljit_s32 reg; sljit_sw imm, nimm; if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { @@ -667,34 +677,34 @@ static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, slj if (dst == arg2) return SLJIT_SUCCESS; return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); - case SLJIT_MOV_UB: - case SLJIT_MOVU_UB: + case SLJIT_MOV_U8: + case SLJIT_MOVU_U8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10)); - case SLJIT_MOV_SB: - case SLJIT_MOVU_SB: + case SLJIT_MOV_S8: + case SLJIT_MOVU_S8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (!(flags & INT_OP)) inv_bits |= 1 << 22; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); - case SLJIT_MOV_UH: - case SLJIT_MOVU_UH: + case SLJIT_MOV_U16: + case SLJIT_MOVU_U16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10)); - case SLJIT_MOV_SH: - case SLJIT_MOVU_SH: + case SLJIT_MOV_S16: + case SLJIT_MOVU_S16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (!(flags & INT_OP)) inv_bits |= 1 << 22; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); - case SLJIT_MOV_UI: - case SLJIT_MOVU_UI: + case SLJIT_MOV_U32: + case SLJIT_MOVU_U32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if ((flags & INT_OP) && dst == arg2) return SLJIT_SUCCESS; return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); - case SLJIT_MOV_SI: - case SLJIT_MOVU_SI: + case SLJIT_MOV_S32: + case SLJIT_MOVU_S32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if ((flags & INT_OP) && dst == arg2) return SLJIT_SUCCESS; @@ -777,28 +787,28 @@ set_flags: #define MEM_SIZE_SHIFT(flags) ((flags) >> 8) -static SLJIT_CONST sljit_ins sljit_mem_imm[4] = { +static const sljit_ins sljit_mem_imm[4] = { /* u l */ 0x39400000 /* ldrb [reg,imm] */, /* u s */ 0x39000000 /* strb [reg,imm] */, /* s l */ 0x39800000 /* ldrsb [reg,imm] */, /* s s */ 0x39000000 /* strb [reg,imm] */, }; -static SLJIT_CONST sljit_ins sljit_mem_simm[4] = { +static const sljit_ins sljit_mem_simm[4] = { /* u l */ 0x38400000 /* ldurb [reg,imm] */, /* u s */ 0x38000000 /* sturb [reg,imm] */, /* s l */ 0x38800000 /* ldursb [reg,imm] */, /* s s */ 0x38000000 /* sturb [reg,imm] */, }; -static SLJIT_CONST sljit_ins sljit_mem_pre_simm[4] = { +static const sljit_ins sljit_mem_pre_simm[4] = { /* u l */ 0x38400c00 /* ldrb [reg,imm]! */, /* u s */ 0x38000c00 /* strb [reg,imm]! */, /* s l */ 0x38800c00 /* ldrsb [reg,imm]! */, /* s s */ 0x38000c00 /* strb [reg,imm]! */, }; -static SLJIT_CONST sljit_ins sljit_mem_reg[4] = { +static const sljit_ins sljit_mem_reg[4] = { /* u l */ 0x38606800 /* ldrb [reg,reg] */, /* u s */ 0x38206800 /* strb [reg,reg] */, /* s l */ 0x38a06800 /* ldrsb [reg,reg] */, @@ -806,7 +816,7 @@ static SLJIT_CONST sljit_ins sljit_mem_reg[4] = { }; /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ -static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) +static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) { if (value >= 0) { if (value <= 0xfff) @@ -825,9 +835,9 @@ static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sl } /* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { - sljit_ui shift = MEM_SIZE_SHIFT(flags); + sljit_u32 shift = MEM_SIZE_SHIFT(flags); SLJIT_ASSERT(arg & SLJIT_MEM); @@ -882,7 +892,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, /* see getput_arg below. Note: can_cache is called only for binary operators. Those operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_sw diff; if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) @@ -906,11 +916,11 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ } /* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, - sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { - sljit_ui shift = MEM_SIZE_SHIFT(flags); - sljit_si tmp_r, other_r; + sljit_u32 shift = MEM_SIZE_SHIFT(flags); + sljit_s32 tmp_r, other_r; sljit_sw diff; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1040,7 +1050,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, slji return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3)); } -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg, arg, argw)) return compiler->error; @@ -1049,7 +1059,7 @@ static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_ return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; @@ -1060,11 +1070,11 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si i, tmp, offs, prev, saved_regs_size; + sljit_s32 i, tmp, offs, prev, saved_regs_size; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -1148,9 +1158,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -1162,10 +1172,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si local_size; - sljit_si i, tmp, offs, prev, saved_regs_size; + sljit_s32 local_size; + sljit_s32 i, tmp, offs, prev, saved_regs_size; CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); @@ -1243,9 +1253,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi /* Operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_ins inv_bits = (op & SLJIT_INT_OP) ? (1 << 31) : 0; + sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1256,31 +1266,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, BRK); case SLJIT_NOP: return push_inst(compiler, NOP); - case SLJIT_LUMUL: - case SLJIT_LSMUL: + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); - return push_inst(compiler, (op == SLJIT_LUMUL ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); - case SLJIT_UDIVMOD: - case SLJIT_SDIVMOD: + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); - FAIL_IF(push_inst(compiler, ((op == SLJIT_UDIVMOD ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); - case SLJIT_UDIVI: - case SLJIT_SDIVI: - return push_inst(compiler, ((op == SLJIT_UDIVI ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r, flags, mem_flags; - sljit_si op_flags = GET_ALL_FLAGS(op); + sljit_s32 dst_r, flags, mem_flags; + sljit_s32 op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -1299,69 +1309,69 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_MOV_P: flags = WORD_SIZE; break; - case SLJIT_MOV_UB: + case SLJIT_MOV_U8: flags = BYTE_SIZE; if (src & SLJIT_IMM) - srcw = (sljit_ub)srcw; + srcw = (sljit_u8)srcw; break; - case SLJIT_MOV_SB: + case SLJIT_MOV_S8: flags = BYTE_SIZE | SIGNED; if (src & SLJIT_IMM) - srcw = (sljit_sb)srcw; + srcw = (sljit_s8)srcw; break; - case SLJIT_MOV_UH: + case SLJIT_MOV_U16: flags = HALF_SIZE; if (src & SLJIT_IMM) - srcw = (sljit_uh)srcw; + srcw = (sljit_u16)srcw; break; - case SLJIT_MOV_SH: + case SLJIT_MOV_S16: flags = HALF_SIZE | SIGNED; if (src & SLJIT_IMM) - srcw = (sljit_sh)srcw; + srcw = (sljit_s16)srcw; break; - case SLJIT_MOV_UI: + case SLJIT_MOV_U32: flags = INT_SIZE; if (src & SLJIT_IMM) - srcw = (sljit_ui)srcw; + srcw = (sljit_u32)srcw; break; - case SLJIT_MOV_SI: + case SLJIT_MOV_S32: flags = INT_SIZE | SIGNED; if (src & SLJIT_IMM) - srcw = (sljit_si)srcw; + srcw = (sljit_s32)srcw; break; case SLJIT_MOVU: case SLJIT_MOVU_P: flags = WORD_SIZE | UPDATE; break; - case SLJIT_MOVU_UB: + case SLJIT_MOVU_U8: flags = BYTE_SIZE | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_ub)srcw; + srcw = (sljit_u8)srcw; break; - case SLJIT_MOVU_SB: + case SLJIT_MOVU_S8: flags = BYTE_SIZE | SIGNED | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_sb)srcw; + srcw = (sljit_s8)srcw; break; - case SLJIT_MOVU_UH: + case SLJIT_MOVU_U16: flags = HALF_SIZE | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_uh)srcw; + srcw = (sljit_u16)srcw; break; - case SLJIT_MOVU_SH: + case SLJIT_MOVU_S16: flags = HALF_SIZE | SIGNED | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_sh)srcw; + srcw = (sljit_s16)srcw; break; - case SLJIT_MOVU_UI: + case SLJIT_MOVU_U32: flags = INT_SIZE | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_ui)srcw; + srcw = (sljit_u32)srcw; break; - case SLJIT_MOVU_SI: + case SLJIT_MOVU_S32: flags = INT_SIZE | SIGNED | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_si)srcw; + srcw = (sljit_s32)srcw; break; default: SLJIT_ASSERT_STOP(); @@ -1378,7 +1388,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); } else { if (dst_r != TMP_REG1) - return emit_op_imm(compiler, op | ((op_flags & SLJIT_INT_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); + return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); dst_r = src; } @@ -1393,7 +1403,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op_flags & SLJIT_INT_OP) { + if (op_flags & SLJIT_I32_OP) { flags |= INT_OP; mem_flags = INT_SIZE; } @@ -1411,8 +1421,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler if (src & SLJIT_IMM) { flags |= ARG2_IMM; - if (op_flags & SLJIT_INT_OP) - srcw = (sljit_si)srcw; + if (op_flags & SLJIT_I32_OP) + srcw = (sljit_s32)srcw; } else srcw = src; @@ -1427,12 +1437,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r, flags, mem_flags; + sljit_s32 dst_r, flags, mem_flags; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1446,7 +1456,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; flags = GET_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_INT_OP) { + if (op & SLJIT_I32_OP) { flags |= INT_OP; mem_flags = INT_SIZE; } @@ -1512,20 +1522,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1537,7 +1547,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; @@ -1547,11 +1557,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #endif } -static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { - sljit_ui shift = MEM_SIZE_SHIFT(flags); + sljit_u32 shift = MEM_SIZE_SHIFT(flags); sljit_ins ins_bits = (shift << 30); - sljit_si other_r; + sljit_s32 other_r; sljit_sw diff; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1600,45 +1610,45 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3)); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; - sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; - if (GET_OPCODE(op) == SLJIT_CONVI_FROMD) + if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) inv_bits |= (1 << 31); if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); src = TMP_FREG1; } FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) - return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); + return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; - if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) inv_bits |= (1 << 31); if (src & SLJIT_MEM) { - emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); + emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); src = TMP_REG1; } else if (src & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) - srcw = (sljit_si)srcw; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; #endif FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; @@ -1647,16 +1657,16 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); @@ -1671,11 +1681,11 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; + sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; sljit_ins inv_bits; CHECK_ERROR(); @@ -1685,16 +1695,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); + emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_DMOV: + case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); @@ -1702,14 +1712,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile dst_r = src; } break; - case SLJIT_DNEG: + case SLJIT_NEG_F64: FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); break; - case SLJIT_DABS: + case SLJIT_ABS_F64: FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); break; - case SLJIT_CONVD_FROMS: - FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); break; } @@ -1718,13 +1728,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1746,16 +1756,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile } switch (GET_OPCODE(op)) { - case SLJIT_DADD: + case SLJIT_ADD_F64: FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; - case SLJIT_DSUB: + case SLJIT_SUB_F64: FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; - case SLJIT_DMUL: + case SLJIT_MUL_F64: FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; - case SLJIT_DDIV: + case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; } @@ -1769,7 +1779,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -1786,7 +1796,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -1806,33 +1816,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_si type) +static sljit_uw get_cc(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: case SLJIT_MUL_NOT_OVERFLOW: - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: return 0x1; case SLJIT_NOT_EQUAL: case SLJIT_MUL_OVERFLOW: - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: return 0x0; case SLJIT_LESS: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: return 0x2; case SLJIT_GREATER_EQUAL: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: return 0x3; case SLJIT_GREATER: - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: return 0x9; case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: return 0x8; case SLJIT_SIG_LESS: @@ -1848,11 +1858,11 @@ static sljit_uw get_cc(sljit_si type) return 0xc; case SLJIT_OVERFLOW: - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: return 0x7; case SLJIT_NOT_OVERFLOW: - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: return 0x6; default: @@ -1877,7 +1887,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; @@ -1903,11 +1913,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } -static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_si type, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; - sljit_ins inv_bits = (type & SLJIT_INT_OP) ? (1 << 31) : 0; + sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0; SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1937,7 +1947,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; @@ -1964,12 +1974,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { - sljit_si dst_r, flags, mem_flags; + sljit_s32 dst_r, flags, mem_flags; sljit_ins cc; CHECK_ERROR(); @@ -1994,7 +2004,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com compiler->cache_argw = 0; flags = GET_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_INT_OP) { + if (op & SLJIT_I32_OP) { flags |= INT_OP; mem_flags = INT_SIZE; } @@ -2014,10 +2024,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); @@ -2035,16 +2045,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_ins* inst = (sljit_ins*)addr; - modify_imm64_const(inst, new_addr); + modify_imm64_const(inst, new_target); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { sljit_ins* inst = (sljit_ins*)addr; modify_imm64_const(inst, new_constant); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } diff --git a/pcre2/src/sljit/sljitNativeARM_T2_32.c b/pcre2/src/sljit/sljitNativeARM_T2_32.c index f9803f5d4..95afc5231 100644 --- a/pcre2/src/sljit/sljitNativeARM_T2_32.c +++ b/pcre2/src/sljit/sljitNativeARM_T2_32.c @@ -24,13 +24,13 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "ARM-Thumb2" SLJIT_CPUINFO; } /* Length of an instruction word. */ -typedef sljit_ui sljit_ins; +typedef sljit_u32 sljit_ins; /* Last register + 1. */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) @@ -42,7 +42,7 @@ typedef sljit_ui sljit_ins; #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15 }; @@ -181,21 +181,21 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 -static sljit_si push_inst16(struct sljit_compiler *compiler, sljit_ins inst) +static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) { - sljit_uh *ptr; + sljit_u16 *ptr; SLJIT_ASSERT(!(inst & 0xffff0000)); - ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh)); + ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); FAIL_IF(!ptr); *ptr = inst; compiler->size++; return SLJIT_SUCCESS; } -static sljit_si push_inst32(struct sljit_compiler *compiler, sljit_ins inst) +static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) { - sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins)); + sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); *ptr++ = inst >> 16; *ptr = inst; @@ -203,7 +203,7 @@ static sljit_si push_inst32(struct sljit_compiler *compiler, sljit_ins inst) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_imm32_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) +static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); @@ -211,9 +211,9 @@ static SLJIT_INLINE sljit_si emit_imm32_const(struct sljit_compiler *compiler, s COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); } -static SLJIT_INLINE void modify_imm32_const(sljit_uh *inst, sljit_uw new_imm) +static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) { - sljit_si dst = inst[1] & 0x0f00; + sljit_s32 dst = inst[1] & 0x0f00; SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); @@ -221,7 +221,7 @@ static SLJIT_INLINE void modify_imm32_const(sljit_uh *inst, sljit_uw new_imm) inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); } -static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) { sljit_sw diff; @@ -232,7 +232,7 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh /* Branch to ARM code is not optimized yet. */ if (!(jump->u.target & 0x1)) return 0; - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1; + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1; } else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); @@ -276,25 +276,27 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh return 0; } -static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) +static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset) { - sljit_si type = (jump->flags >> 4) & 0xf; + sljit_s32 type = (jump->flags >> 4) & 0xf; sljit_sw diff; - sljit_uh *jump_inst; - sljit_si s, j1, j2; + sljit_u16 *jump_inst; + sljit_s32 s, j1, j2; if (SLJIT_UNLIKELY(type == 0)) { - modify_imm32_const((sljit_uh*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); return; } if (jump->flags & JUMP_ADDR) { SLJIT_ASSERT(jump->u.target & 0x1); - diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1; + diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; } - else - diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1; - jump_inst = (sljit_uh*)jump->addr; + else { + SLJIT_ASSERT(jump->u.label->addr & 0x1); + diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + } + jump_inst = (sljit_u16*)jump->addr; switch (type) { case 1: @@ -342,11 +344,12 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { struct sljit_memory_fragment *buf; - sljit_uh *code; - sljit_uh *code_ptr; - sljit_uh *buf_ptr; - sljit_uh *buf_end; + sljit_u16 *code; + sljit_u16 *code_ptr; + sljit_u16 *buf_ptr; + sljit_u16 *buf_end; sljit_uw half_count; + sljit_sw executable_offset; struct sljit_label *label; struct sljit_jump *jump; @@ -356,18 +359,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); - code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh)); + code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16)); PTR_FAIL_WITH_EXEC_IF(code); buf = compiler->buf; code_ptr = code; half_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; do { - buf_ptr = (sljit_uh*)buf->memory; + buf_ptr = (sljit_u16*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 1); do { *code_ptr = *buf_ptr++; @@ -376,13 +381,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!jump || jump->addr >= half_count); SLJIT_ASSERT(!const_ || const_->addr >= half_count); if (label && label->size == half_count) { - label->addr = ((sljit_uw)code_ptr) | 0x1; + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = code_ptr - code; label = label->next; } if (jump && jump->addr == half_count) { jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); - code_ptr -= detect_jump_type(jump, code_ptr, code); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == half_count) { @@ -397,7 +402,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == half_count) { - label->addr = ((sljit_uw)code_ptr) | 0x1; + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = code_ptr - code; label = label->next; } @@ -409,12 +414,17 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - set_jump_instruction(jump); + set_jump_instruction(jump, executable_offset); jump = jump->next; } compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_uh); + compiler->executable_offset = executable_offset; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); /* Set thumb mode flag. */ return (void*)((sljit_uw)code | 0x1); @@ -428,7 +438,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil static sljit_uw get_imm(sljit_uw imm) { /* Thumb immediate form. */ - sljit_si counter; + sljit_s32 counter; if (imm <= 0xff) return imm; @@ -474,7 +484,7 @@ static sljit_uw get_imm(sljit_uw imm) return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); } -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { sljit_uw tmp; @@ -508,12 +518,12 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sl #define SLOW_SRC1 0x0800000 #define SLOW_SRC2 0x1000000 -static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_uw arg1, sljit_uw arg2) +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) { /* dst must be register, TMP_REG1 arg1 must be register, TMP_REG1, imm arg2 must be register, TMP_REG2, imm */ - sljit_si reg; + sljit_s32 reg; sljit_uw imm, nimm; if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { @@ -677,37 +687,37 @@ static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, slj /* Both arguments are registers. */ switch (flags & 0xffff) { case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: case SLJIT_MOV_P: case SLJIT_MOVU: - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: + case SLJIT_MOVU_U32: + case SLJIT_MOVU_S32: case SLJIT_MOVU_P: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (dst == arg2) return SLJIT_SUCCESS; return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); - case SLJIT_MOV_UB: - case SLJIT_MOVU_UB: + case SLJIT_MOV_U8: + case SLJIT_MOVU_U8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); - case SLJIT_MOV_SB: - case SLJIT_MOVU_SB: + case SLJIT_MOV_S8: + case SLJIT_MOVU_S8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); - case SLJIT_MOV_UH: - case SLJIT_MOVU_UH: + case SLJIT_MOV_U16: + case SLJIT_MOVU_U16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); - case SLJIT_MOV_SH: - case SLJIT_MOVU_SH: + case SLJIT_MOV_S16: + case SLJIT_MOVU_S16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); @@ -813,7 +823,7 @@ static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, slj s = store */ -static SLJIT_CONST sljit_ins sljit_mem16[12] = { +static const sljit_ins sljit_mem16[12] = { /* w u l */ 0x5800 /* ldr */, /* w u s */ 0x5000 /* str */, /* w s l */ 0x5800 /* ldr */, @@ -830,7 +840,7 @@ static SLJIT_CONST sljit_ins sljit_mem16[12] = { /* h s s */ 0x5200 /* strh */, }; -static SLJIT_CONST sljit_ins sljit_mem16_imm5[12] = { +static const sljit_ins sljit_mem16_imm5[12] = { /* w u l */ 0x6800 /* ldr imm5 */, /* w u s */ 0x6000 /* str imm5 */, /* w s l */ 0x6800 /* ldr imm5 */, @@ -849,7 +859,7 @@ static SLJIT_CONST sljit_ins sljit_mem16_imm5[12] = { #define MEM_IMM8 0xc00 #define MEM_IMM12 0x800000 -static SLJIT_CONST sljit_ins sljit_mem32[12] = { +static const sljit_ins sljit_mem32[12] = { /* w u l */ 0xf8500000 /* ldr.w */, /* w u s */ 0xf8400000 /* str.w */, /* w s l */ 0xf8500000 /* ldr.w */, @@ -867,7 +877,7 @@ static SLJIT_CONST sljit_ins sljit_mem32[12] = { }; /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ -static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) +static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) { if (value >= 0) { if (value <= 0xfff) @@ -888,9 +898,9 @@ static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sl } /* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { - sljit_si other_r, shift; + sljit_s32 other_r, shift; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -975,7 +985,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, /* see getput_arg below. Note: can_cache is called only for binary operators. Those operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_sw diff; if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) @@ -999,10 +1009,10 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ } /* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, - sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { - sljit_si tmp_r, other_r; + sljit_s32 tmp_r, other_r; sljit_sw diff; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1107,7 +1117,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, slji return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); } -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg, arg, argw)) return compiler->error; @@ -1116,7 +1126,7 @@ static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_ return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; @@ -1127,11 +1137,11 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si size, i, tmp; + sljit_s32 size, i, tmp; sljit_ins push; CHECK_ERROR(); @@ -1172,11 +1182,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si size; + sljit_s32 size; CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -1187,9 +1197,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si i, tmp; + sljit_s32 i, tmp; sljit_ins pop; CHECK_ERROR(); @@ -1237,7 +1247,7 @@ extern int __aeabi_idivmod(int numerator, int denominator); } #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { sljit_sw saved_reg_list[3]; sljit_sw saved_reg_count; @@ -1251,18 +1261,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst16(compiler, BKPT); case SLJIT_NOP: return push_inst16(compiler, NOP); - case SLJIT_LUMUL: - case SLJIT_LSMUL: - return push_inst32(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) | (reg_map[SLJIT_R1] << 8) | (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 16) | reg_map[SLJIT_R1]); - case SLJIT_UDIVMOD: - case SLJIT_SDIVMOD: - case SLJIT_UDIVI: - case SLJIT_SDIVI: - SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping); saved_reg_count = 0; @@ -1270,7 +1280,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler saved_reg_list[saved_reg_count++] = 12; if (compiler->scratches >= 3) saved_reg_list[saved_reg_count++] = 2; - if (op >= SLJIT_UDIVI) + if (op >= SLJIT_DIV_UW) saved_reg_list[saved_reg_count++] = 1; if (saved_reg_count > 0) { @@ -1288,7 +1298,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif @@ -1311,12 +1321,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r, flags; - sljit_si op_flags = GET_ALL_FLAGS(op); + sljit_s32 dst_r, flags; + sljit_s32 op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -1332,56 +1342,56 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { switch (op) { case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: case SLJIT_MOV_P: flags = WORD_SIZE; break; - case SLJIT_MOV_UB: + case SLJIT_MOV_U8: flags = BYTE_SIZE; if (src & SLJIT_IMM) - srcw = (sljit_ub)srcw; + srcw = (sljit_u8)srcw; break; - case SLJIT_MOV_SB: + case SLJIT_MOV_S8: flags = BYTE_SIZE | SIGNED; if (src & SLJIT_IMM) - srcw = (sljit_sb)srcw; + srcw = (sljit_s8)srcw; break; - case SLJIT_MOV_UH: + case SLJIT_MOV_U16: flags = HALF_SIZE; if (src & SLJIT_IMM) - srcw = (sljit_uh)srcw; + srcw = (sljit_u16)srcw; break; - case SLJIT_MOV_SH: + case SLJIT_MOV_S16: flags = HALF_SIZE | SIGNED; if (src & SLJIT_IMM) - srcw = (sljit_sh)srcw; + srcw = (sljit_s16)srcw; break; case SLJIT_MOVU: - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: + case SLJIT_MOVU_U32: + case SLJIT_MOVU_S32: case SLJIT_MOVU_P: flags = WORD_SIZE | UPDATE; break; - case SLJIT_MOVU_UB: + case SLJIT_MOVU_U8: flags = BYTE_SIZE | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_ub)srcw; + srcw = (sljit_u8)srcw; break; - case SLJIT_MOVU_SB: + case SLJIT_MOVU_S8: flags = BYTE_SIZE | SIGNED | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_sb)srcw; + srcw = (sljit_s8)srcw; break; - case SLJIT_MOVU_UH: + case SLJIT_MOVU_U16: flags = HALF_SIZE | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_uh)srcw; + srcw = (sljit_u16)srcw; break; - case SLJIT_MOVU_SH: + case SLJIT_MOVU_S16: flags = HALF_SIZE | SIGNED | UPDATE; if (src & SLJIT_IMM) - srcw = (sljit_sh)srcw; + srcw = (sljit_s16)srcw; break; default: SLJIT_ASSERT_STOP(); @@ -1444,12 +1454,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r, flags; + sljit_s32 dst_r, flags; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1523,26 +1533,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg << 1; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); if (size == 2) - return push_inst16(compiler, *(sljit_uh*)instruction); + return push_inst16(compiler, *(sljit_u16*)instruction); return push_inst32(compiler, *(sljit_ins*)instruction); } @@ -1550,7 +1560,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; @@ -1562,11 +1572,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define FPU_LOAD (1 << 20) -static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_sw tmp; sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD)); + sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1626,16 +1636,16 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; } - FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_SINGLE_OP) | DD4(TMP_FREG1) | DM4(src))); + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src))); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; @@ -1647,11 +1657,11 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (FAST_IS_REG(src)) FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); @@ -1664,85 +1674,85 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); } - FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(TMP_FREG1))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); src2 = TMP_FREG2; } - FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_F32_OP) | DD4(src1) | DM4(src2))); return push_inst32(compiler, VMRS); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw); + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw); src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_DMOV: + case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); else dst_r = src; } break; - case SLJIT_DNEG: - FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + case SLJIT_NEG_F64: + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); break; - case SLJIT_DABS: - FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + case SLJIT_ABS_F64: + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); break; - case SLJIT_CONVD_FROMS: - FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); - op ^= SLJIT_SINGLE_OP; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_F32_OP; break; } if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1752,36 +1762,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile compiler->cache_arg = 0; compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; + op ^= SLJIT_F32_OP; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); src2 = TMP_FREG2; } switch (GET_OPCODE(op)) { - case SLJIT_DADD: - FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + case SLJIT_ADD_F64: + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; - case SLJIT_DSUB: - FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + case SLJIT_SUB_F64: + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; - case SLJIT_DMUL: - FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + case SLJIT_MUL_F64: + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; - case SLJIT_DDIV: - FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + case SLJIT_DIV_F64: + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; } if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); } #undef FPU_LOAD @@ -1790,7 +1800,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -1813,7 +1823,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -1840,33 +1850,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_si type) +static sljit_uw get_cc(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: case SLJIT_MUL_NOT_OVERFLOW: - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: return 0x0; case SLJIT_NOT_EQUAL: case SLJIT_MUL_OVERFLOW: - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: return 0x1; case SLJIT_LESS: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: return 0x3; case SLJIT_GREATER_EQUAL: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: return 0x2; case SLJIT_GREATER: - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: return 0x8; case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: return 0x9; case SLJIT_SIG_LESS: @@ -1882,11 +1892,11 @@ static sljit_uw get_cc(sljit_si type) return 0xd; case SLJIT_OVERFLOW: - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: return 0x6; case SLJIT_NOT_OVERFLOW: - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: return 0x7; default: /* SLJIT_JUMP */ @@ -1911,7 +1921,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; sljit_ins cc; @@ -1944,7 +1954,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; @@ -1972,12 +1982,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { - sljit_si dst_r, flags = GET_ALL_FLAGS(op); + sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); sljit_ins cc, ins; CHECK_ERROR(); @@ -2054,10 +2064,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); @@ -2075,16 +2085,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_uh *inst = (sljit_uh*)addr; - modify_imm32_const(inst, new_addr); + sljit_u16 *inst = (sljit_u16*)addr; + modify_imm32_const(inst, new_target); + inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_uh *inst = (sljit_uh*)addr; + sljit_u16 *inst = (sljit_u16*)addr; modify_imm32_const(inst, new_constant); + inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } diff --git a/pcre2/src/sljit/sljitNativeMIPS_32.c b/pcre2/src/sljit/sljitNativeMIPS_32.c index b2b60d7a4..b15a57dfd 100644 --- a/pcre2/src/sljit/sljitNativeMIPS_32.c +++ b/pcre2/src/sljit/sljitNativeMIPS_32.c @@ -26,7 +26,7 @@ /* mips 32-bit arch dependent functions. */ -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) { if (!(imm & ~0xffff)) return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); @@ -66,24 +66,24 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \ } -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_sw src2) +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) { + if (op == SLJIT_MOV_S8) { #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); #else @@ -97,11 +97,11 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT_STOP(); return SLJIT_SUCCESS; - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) { + if (op == SLJIT_MOV_S16) { #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); #else @@ -341,26 +341,28 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) { FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } diff --git a/pcre2/src/sljit/sljitNativeMIPS_64.c b/pcre2/src/sljit/sljitNativeMIPS_64.c index 185fb5768..8b96d5b73 100644 --- a/pcre2/src/sljit/sljitNativeMIPS_64.c +++ b/pcre2/src/sljit/sljitNativeMIPS_64.c @@ -26,11 +26,11 @@ /* mips 64-bit arch dependent functions. */ -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) { - sljit_si shift = 32; - sljit_si shift2; - sljit_si inv = 0; + sljit_s32 shift = 32; + sljit_s32 shift2; + sljit_s32 inv = 0; sljit_ins ins; sljit_uw uimm; @@ -119,7 +119,7 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, } #define SELECT_OP(a, b) \ - (!(op & SLJIT_INT_OP) ? a : b) + (!(op & SLJIT_I32_OP) ? a : b) #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ @@ -138,27 +138,27 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, #define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ if (flags & SRC2_IMM) { \ if (src2 >= 32) { \ - SLJIT_ASSERT(!(op & SLJIT_INT_OP)); \ + SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \ ins = op_dimm32; \ src2 -= 32; \ } \ else \ - ins = (op & SLJIT_INT_OP) ? op_imm : op_dimm; \ + ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \ if (op & SLJIT_SET_E) \ FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ if (CHECK_FLAGS(SLJIT_SET_E)) \ FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ } \ else { \ - ins = (op & SLJIT_INT_OP) ? op_v : op_dv; \ + ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \ if (op & SLJIT_SET_E) \ FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ if (CHECK_FLAGS(SLJIT_SET_E)) \ FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \ } -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_sw src2) +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { sljit_ins ins; @@ -170,11 +170,11 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) { + if (op == SLJIT_MOV_S8) { FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); } @@ -184,11 +184,11 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT_STOP(); return SLJIT_SUCCESS; - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) { + if (op == SLJIT_MOV_S16) { FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); } @@ -198,12 +198,12 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT_STOP(); return SLJIT_SUCCESS; - case SLJIT_MOV_UI: - SLJIT_ASSERT(!(op & SLJIT_INT_OP)); + case SLJIT_MOV_U32: + SLJIT_ASSERT(!(op & SLJIT_I32_OP)); FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); - case SLJIT_MOV_SI: + case SLJIT_MOV_S32: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); @@ -231,7 +231,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); /* Check zero. */ FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_INT_OP) ? 32 : 64), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); /* Loop for searching the highest bit. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); @@ -392,7 +392,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT(!(flags & SRC2_IMM)); if (!(op & SLJIT_SET_O)) { #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) - if (op & SLJIT_INT_OP) + if (op & SLJIT_I32_OP) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); return push_inst(compiler, MFLO | D(dst), DR(dst)); @@ -436,7 +436,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) { FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst))); FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst))); @@ -446,24 +446,26 @@ static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_s return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff); inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); } diff --git a/pcre2/src/sljit/sljitNativeMIPS_common.c b/pcre2/src/sljit/sljitNativeMIPS_common.c index cf3535f81..fe37e3ef0 100644 --- a/pcre2/src/sljit/sljitNativeMIPS_common.c +++ b/pcre2/src/sljit/sljitNativeMIPS_common.c @@ -27,7 +27,7 @@ /* Latest MIPS architecture. */ /* Automatically detect SLJIT_MIPS_R1 */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -42,7 +42,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) /* Length of an instruction word Both for mips-32 and mips-64 */ -typedef sljit_ui sljit_ins; +typedef sljit_u32 sljit_ins; #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) @@ -68,7 +68,7 @@ typedef sljit_ui sljit_ins; #define TMP_FREG1 (0) #define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 }; @@ -201,7 +201,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { /* dest_reg is the absolute name of the register Useful for reordering instructions in the delay slot. */ -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) { SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); @@ -213,12 +213,12 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_ return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins invert_branch(sljit_si flags) +static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags) { return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16); } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -237,9 +237,10 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - inst = (sljit_ins*)jump->addr; + + inst = (sljit_ins *)jump->addr; if (jump->flags & IS_COND) inst--; @@ -250,7 +251,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i /* B instructions. */ if (jump->flags & IS_MOVABLE) { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; + diff = ((sljit_sw)target_addr - (sljit_sw)inst - executable_offset) >> 2; if (diff <= SIMM_MAX && diff >= SIMM_MIN) { jump->flags |= PATCH_B; @@ -268,7 +269,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } } else { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2; + diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2; if (diff <= SIMM_MAX && diff >= SIMM_MIN) { jump->flags |= PATCH_B; @@ -364,6 +365,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; @@ -380,9 +382,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); @@ -393,8 +398,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!const_ || const_->addr >= word_count); /* These structures are ordered by their address. */ if (label && label->size == word_count) { - /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -404,7 +408,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #else jump->addr = (sljit_uw)(code_ptr - 7); #endif - code_ptr = detect_jump_type(jump, code_ptr, code); + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == word_count) { @@ -434,16 +438,16 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2; + addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2; SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); break; } if (jump->flags & PATCH_J) { - SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)); + SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff)); buf_ptr[0] |= (addr >> 2) & 0x03ffffff; break; } @@ -476,7 +480,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + #ifndef __GNUC__ SLJIT_CACHE_FLUSH(code, code_ptr); #else @@ -538,12 +547,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeMIPS_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_ins base; - sljit_si i, tmp, offs; + sljit_s32 i, tmp, offs; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -575,12 +584,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; for (i = SLJIT_S0; i >= tmp; i--) { - offs -= (sljit_si)(sizeof(sljit_sw)); + offs -= (sljit_s32)(sizeof(sljit_sw)); FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); } for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { - offs -= (sljit_si)(sizeof(sljit_sw)); + offs -= (sljit_s32)(sizeof(sljit_sw)); FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); } @@ -594,9 +603,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -611,9 +620,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si local_size, i, tmp, offs; + sljit_s32 local_size, i, tmp, offs; sljit_ins base; CHECK_ERROR(); @@ -631,19 +640,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi local_size = 0; } - FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); - offs = local_size - (sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_s32)sizeof(sljit_sw)), RETURN_ADDR_REG)); + offs = local_size - (sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); tmp = compiler->scratches; for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); - offs += (sljit_si)(sizeof(sljit_sw)); + offs += (sljit_s32)(sizeof(sljit_sw)); } tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; for (i = tmp; i <= SLJIT_S0; i++) { FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); - offs += (sljit_si)(sizeof(sljit_sw)); + offs += (sljit_s32)(sizeof(sljit_sw)); } SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); @@ -668,7 +677,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi #define ARCH_32_64(a, b) b #endif -static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { +static const sljit_ins data_transfer_insts[16 + 4] = { /* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), /* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), /* u b s */ HI(40) /* sb */, @@ -698,7 +707,7 @@ static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { /* reg_ar is an absoulute register! */ /* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) { SLJIT_ASSERT(arg & SLJIT_MEM); @@ -716,7 +725,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, /* See getput_arg below. Note: can_cache is called only for binary operators. Those operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); @@ -739,9 +748,9 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ } /* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { - sljit_si tmp_ar, base, delay_slot; + sljit_s32 tmp_ar, base, delay_slot; SLJIT_ASSERT(arg & SLJIT_MEM); if (!(next_arg & SLJIT_MEM)) { @@ -878,7 +887,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, slji return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) return compiler->error; @@ -887,26 +896,26 @@ static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_ return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { /* arg1 goes to TMP_REG1 or src reg arg2 goes to TMP_REG2, imm or src reg TMP_REG3 can be used for caching result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_si dst_r = TMP_REG2; - sljit_si src1_r; + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_si sugg_src2_r = TMP_REG2; + sljit_s32 sugg_src2_r = TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -914,7 +923,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f } if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) return SLJIT_SUCCESS; if (GET_FLAGS(op)) flags |= UNUSED_DEST; @@ -922,7 +931,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) sugg_src2_r = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) @@ -976,7 +985,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) dst_r = src2_r; } else if (src2 & SLJIT_IMM) { @@ -987,7 +996,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f } else { src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) + if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM)) dst_r = 0; } } @@ -1029,10 +1038,10 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - sljit_si int_op = op & SLJIT_INT_OP; + sljit_s32 int_op = op & SLJIT_I32_OP; #endif CHECK_ERROR(); @@ -1044,20 +1053,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, BREAK, UNMOVABLE_INS); case SLJIT_NOP: return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_LUMUL: - case SLJIT_LSMUL: + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); - case SLJIT_UDIVMOD: - case SLJIT_SDIVMOD: - case SLJIT_UDIVI: - case SLJIT_SDIVI: - SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); #if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); @@ -1065,28 +1074,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (int_op) - FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); else - FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); - return (op >= SLJIT_UDIVI) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); + return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # define flags 0 #else - sljit_si flags = 0; + sljit_s32 flags = 0; #endif CHECK_ERROR(); @@ -1095,10 +1104,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler ADJUST_LOCAL_OFFSET(src, srcw); #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if ((op & SLJIT_INT_OP) && GET_OPCODE(op) >= SLJIT_NOT) { + if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) { flags |= INT_DATA | SIGNED_DATA; if (src & SLJIT_IMM) - srcw = (sljit_si)srcw; + srcw = (sljit_s32)srcw; } #endif @@ -1107,61 +1116,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_UI: + case SLJIT_MOV_U32: #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); #else - return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); #endif - case SLJIT_MOV_SI: + case SLJIT_MOV_S32: #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); #else - return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif - case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); - case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); - case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); - case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_MOVU: case SLJIT_MOVU_P: return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_UI: + case SLJIT_MOVU_U32: #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); #else - return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); #endif - case SLJIT_MOVU_SI: + case SLJIT_MOVU_S32: #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); #else - return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif - case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + case SLJIT_MOVU_U8: + return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); - case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + case SLJIT_MOVU_S8: + return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); - case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + case SLJIT_MOVU_U16: + return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); - case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + case SLJIT_MOVU_S16: + return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_NOT: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); @@ -1180,15 +1189,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #endif } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # define flags 0 #else - sljit_si flags = 0; + sljit_s32 flags = 0; #endif CHECK_ERROR(); @@ -1198,12 +1207,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler ADJUST_LOCAL_OFFSET(src2, src2w); #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (op & SLJIT_INT_OP) { + if (op & SLJIT_I32_OP) { flags |= INT_DATA | SIGNED_DATA; if (src1 & SLJIT_IMM) - src1w = (sljit_si)src1w; + src1w = (sljit_s32)src1w; if (src2 & SLJIT_IMM) - src2w = (sljit_si)src2w; + src2w = (sljit_s32)src2w; } #endif @@ -1232,7 +1241,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler src2w &= 0x1f; #else if (src2 & SLJIT_IMM) { - if (op & SLJIT_INT_OP) + if (op & SLJIT_I32_OP) src2w &= 0x1f; else src2w &= 0x3f; @@ -1248,20 +1257,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler #endif } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg << 1; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1273,7 +1282,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; @@ -1286,17 +1295,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #endif } -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) -#define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8)) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) +#define FMT(op) (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) << (21 - 8)) -static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # define flags 0 #else - sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVW_FROMD) << 21; + sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) << 21; #endif if (src & SLJIT_MEM) { @@ -1322,17 +1331,17 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * #endif } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # define flags 0 #else - sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVD_FROMW) << 21; + sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21; #endif - sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); @@ -1342,14 +1351,14 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) - srcw = (sljit_si)srcw; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; #endif FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); } - FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); if (dst & SLJIT_MEM) return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); @@ -1360,9 +1369,9 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * #endif } -static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { if (src1 & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); @@ -1399,21 +1408,21 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); compiler->cache_arg = 0; compiler->cache_argw = 0; - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; @@ -1425,7 +1434,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile src <<= 1; switch (GET_OPCODE(op)) { - case SLJIT_DMOV: + case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); @@ -1433,15 +1442,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile dst_r = src; } break; - case SLJIT_DNEG: + case SLJIT_NEG_F64: FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_DABS: + case SLJIT_ABS_F64: FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_CONVD_FROMS: - FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_SINGLE_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); - op ^= SLJIT_SINGLE_OP; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_F32_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_F32_OP; break; } @@ -1450,12 +1459,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r, flags = 0; + sljit_s32 dst_r, flags = 0; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1509,19 +1518,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile src2 = TMP_FREG2; switch (GET_OPCODE(op)) { - case SLJIT_DADD: + case SLJIT_ADD_F64: FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_DSUB: + case SLJIT_SUB_F64: FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_DMUL: + case SLJIT_MUL_F64: FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_DDIV: + case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; } @@ -1536,7 +1545,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -1553,7 +1562,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -1617,12 +1626,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi flags = IS_BIT16_COND; \ delay_check = FCSR_FCC; -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; sljit_ins inst; - sljit_si flags = 0; - sljit_si delay_check = UNMOVABLE_INS; + sljit_s32 flags = 0; + sljit_s32 delay_check = UNMOVABLE_INS; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); @@ -1634,27 +1643,27 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile switch (type) { case SLJIT_EQUAL: - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: BR_NZ(EQUAL_FLAG); break; case SLJIT_NOT_EQUAL: - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: BR_Z(EQUAL_FLAG); break; case SLJIT_LESS: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: BR_Z(ULESS_FLAG); break; case SLJIT_GREATER_EQUAL: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: BR_NZ(ULESS_FLAG); break; case SLJIT_GREATER: - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: BR_Z(UGREATER_FLAG); break; case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: BR_NZ(UGREATER_FLAG); break; case SLJIT_SIG_LESS: @@ -1677,10 +1686,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_MUL_NOT_OVERFLOW: BR_NZ(OVERFLOW_FLAG); break; - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: BR_F(); break; - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: BR_T(); break; default: @@ -1733,12 +1742,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile src2 = 0; \ } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { struct sljit_jump *jump; - sljit_si flags; + sljit_s32 flags; sljit_ins inst; CHECK_ERROR_PTR(); @@ -1748,7 +1757,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler compiler->cache_arg = 0; compiler->cache_argw = 0; - flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; + flags = ((type & SLJIT_I32_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; if (src1 & SLJIT_MEM) { PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); src1 = TMP_REG1; @@ -1854,13 +1863,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler #undef RESOLVE_IMM1 #undef RESOLVE_IMM2 -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { struct sljit_jump *jump; sljit_ins inst; - sljit_si if_true; + sljit_s32 if_true; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); @@ -1888,37 +1897,37 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile jump->flags |= IS_BIT16_COND; switch (type & 0xff) { - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: inst = C_UEQ_S; if_true = 1; break; - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: inst = C_UEQ_S; if_true = 0; break; - case SLJIT_D_LESS: + case SLJIT_LESS_F64: inst = C_ULT_S; if_true = 1; break; - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: inst = C_ULT_S; if_true = 0; break; - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: inst = C_ULE_S; if_true = 0; break; - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: inst = C_ULE_S; if_true = 1; break; - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: inst = C_UN_S; if_true = 1; break; default: /* Make compilers happy. */ SLJIT_ASSERT_STOP(); - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: inst = C_UN_S; if_true = 0; break; @@ -1943,9 +1952,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile #undef FLOAT_DATA #undef FMT -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { - sljit_si src_r = TMP_REG2; + sljit_s32 src_r = TMP_REG2; struct sljit_jump *jump = NULL; CHECK_ERROR(); @@ -2001,17 +2010,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { - sljit_si sugg_dst_ar, dst_ar; - sljit_si flags = GET_ALL_FLAGS(op); + sljit_s32 sugg_dst_ar, dst_ar; + sljit_s32 flags = GET_ALL_FLAGS(op); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # define mem_type WORD_DATA #else - sljit_si mem_type = (op & SLJIT_INT_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; + sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; #endif CHECK_ERROR(); @@ -2023,7 +2032,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com op = GET_OPCODE(op); #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (op == SLJIT_MOV_SI || op == SLJIT_MOV_UI) + if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32) mem_type = INT_DATA | SIGNED_DATA; #endif sugg_dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); @@ -2045,14 +2054,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com break; case SLJIT_LESS: case SLJIT_GREATER_EQUAL: - case SLJIT_D_LESS: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_LESS_F64: + case SLJIT_GREATER_EQUAL_F64: dst_ar = ULESS_FLAG; break; case SLJIT_GREATER: case SLJIT_LESS_EQUAL: - case SLJIT_D_GREATER: - case SLJIT_D_LESS_EQUAL: + case SLJIT_GREATER_F64: + case SLJIT_LESS_EQUAL_F64: dst_ar = UGREATER_FLAG; break; case SLJIT_SIG_LESS: @@ -2073,13 +2082,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com dst_ar = sugg_dst_ar; type ^= 0x1; /* Flip type bit for the XORI below. */ break; - case SLJIT_D_EQUAL: - case SLJIT_D_NOT_EQUAL: + case SLJIT_EQUAL_F64: + case SLJIT_NOT_EQUAL_F64: dst_ar = EQUAL_FLAG; break; - case SLJIT_D_UNORDERED: - case SLJIT_D_ORDERED: + case SLJIT_UNORDERED_F64: + case SLJIT_ORDERED_F64: FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar)); FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar)); FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); @@ -2115,10 +2124,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com #endif } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; - sljit_si reg; + sljit_s32 reg; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); diff --git a/pcre2/src/sljit/sljitNativePPC_32.c b/pcre2/src/sljit/sljitNativePPC_32.c index b14b75ceb..f696d1b8d 100644 --- a/pcre2/src/sljit/sljitNativePPC_32.c +++ b/pcre2/src/sljit/sljitNativePPC_32.c @@ -26,7 +26,7 @@ /* ppc 32-bit arch dependent functions. */ -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); @@ -41,39 +41,39 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sl #define INS_CLEAR_LEFT(dst, src, from) \ (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1)) -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_si src2) +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) { switch (op) { case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1); if (dst != src2) return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); return SLJIT_SUCCESS; - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) + if (op == SLJIT_MOV_S8) return push_inst(compiler, EXTSB | S(src2) | A(dst)); return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); } - else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) return push_inst(compiler, EXTSB | S(src2) | A(dst)); else { SLJIT_ASSERT(dst == src2); } return SLJIT_SUCCESS; - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) + if (op == SLJIT_MOV_S16) return push_inst(compiler, EXTSH | S(src2) | A(dst)); return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); } @@ -244,26 +244,28 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value) +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) { FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } diff --git a/pcre2/src/sljit/sljitNativePPC_64.c b/pcre2/src/sljit/sljitNativePPC_64.c index 182ac7b3d..386d247db 100644 --- a/pcre2/src/sljit/sljitNativePPC_64.c +++ b/pcre2/src/sljit/sljitNativePPC_64.c @@ -41,7 +41,7 @@ #define PUSH_RLDICR(reg, shift) \ push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1)) -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { sljit_uw tmp; sljit_uw shift; @@ -145,8 +145,8 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sl src1 = TMP_REG1; \ } -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_si src2) +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) { switch (op) { case SLJIT_MOV: @@ -156,11 +156,11 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); return SLJIT_SUCCESS; - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: SLJIT_ASSERT(src1 == TMP_REG1); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SI) + if (op == SLJIT_MOV_S32) return push_inst(compiler, EXTSW | S(src2) | A(dst)); return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0)); } @@ -169,26 +169,26 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } return SLJIT_SUCCESS; - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) + if (op == SLJIT_MOV_S8) return push_inst(compiler, EXTSB | S(src2) | A(dst)); return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); } - else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) return push_inst(compiler, EXTSB | S(src2) | A(dst)); else { SLJIT_ASSERT(dst == src2); } return SLJIT_SUCCESS; - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) + if (op == SLJIT_MOV_S16) return push_inst(compiler, EXTSH | S(src2) | A(dst)); return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); } @@ -389,7 +389,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value) +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) { FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32))); @@ -398,18 +398,19 @@ static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_s return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_ins *inst = (sljit_ins*)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { sljit_ins *inst = (sljit_ins*)addr; @@ -417,5 +418,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); } diff --git a/pcre2/src/sljit/sljitNativePPC_common.c b/pcre2/src/sljit/sljitNativePPC_common.c index b6a043f4e..150c0bf9f 100644 --- a/pcre2/src/sljit/sljitNativePPC_common.c +++ b/pcre2/src/sljit/sljitNativePPC_common.c @@ -24,14 +24,14 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "PowerPC" SLJIT_CPUINFO; } /* Length of an instruction word. Both for ppc-32 and ppc-64. */ -typedef sljit_ui sljit_ins; +typedef sljit_u32 sljit_ins; #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \ || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -46,6 +46,8 @@ typedef sljit_ui sljit_ins; #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1 #endif +#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) + static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) { #ifdef _AIX @@ -87,6 +89,8 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #endif /* _AIX */ } +#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */ + #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) @@ -101,7 +105,7 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_FREG1 (0) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { 0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12 }; @@ -236,7 +240,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct } #endif -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); @@ -245,7 +249,7 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -263,7 +267,7 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -271,7 +275,7 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins goto keep_address; #endif - diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l; + diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l; extra_jump_flags = 0; if (jump->flags & IS_COND) { @@ -292,6 +296,7 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins jump->flags |= PATCH_B | extra_jump_flags; return 1; } + if (target_addr <= 0x03ffffff) { jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; return 1; @@ -305,6 +310,7 @@ keep_address: jump->flags |= PATCH_ABS32; return 1; } + if (target_addr <= 0x7fffffffffffl) { jump->flags |= PATCH_ABS48; return 1; @@ -322,6 +328,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; @@ -345,9 +352,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); @@ -359,7 +369,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -369,7 +379,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #else jump->addr = (sljit_uw)(code_ptr - 6); #endif - if (detect_jump_type(jump, code_ptr, code)) { + if (detect_jump_type(jump, code_ptr, code, executable_offset)) { #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) code_ptr[-3] = code_ptr[0]; code_ptr -= 3; @@ -416,7 +426,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -434,11 +444,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; + if (jump->flags & PATCH_B) { if (jump->flags & IS_COND) { if (!(jump->flags & PATCH_ABS_B)) { - addr = addr - jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); } @@ -449,7 +460,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } else { if (!(jump->flags & PATCH_ABS_B)) { - addr = addr - jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); } @@ -460,6 +471,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } break; } + /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); @@ -488,19 +500,25 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); - SLJIT_CACHE_FLUSH(code, code_ptr); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (((sljit_sw)code_ptr) & 0x4) code_ptr++; - sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); - return code_ptr; -#else - sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); - return code_ptr; #endif + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); +#endif + + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + return code_ptr; #else return code; #endif @@ -571,32 +589,32 @@ ALT_FORM6 0x200000 */ #define STACK_LOAD LD #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si i, tmp, offs; + sljit_s32 i, tmp, offs; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); FAIL_IF(push_inst(compiler, MFLR | D(0))); - offs = -(sljit_si)(sizeof(sljit_sw)); + offs = -(sljit_s32)(sizeof(sljit_sw)); FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; for (i = SLJIT_S0; i >= tmp; i--) { - offs -= (sljit_si)(sizeof(sljit_sw)); + offs -= (sljit_s32)(sizeof(sljit_sw)); FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); } for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { - offs -= (sljit_si)(sizeof(sljit_sw)); + offs -= (sljit_s32)(sizeof(sljit_sw)); FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); } - SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); + SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); @@ -635,9 +653,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -648,9 +666,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si i, tmp, offs; + sljit_s32 i, tmp, offs; CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); @@ -670,18 +688,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); #endif - offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); tmp = compiler->scratches; for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); - offs += (sljit_si)(sizeof(sljit_sw)); + offs += (sljit_s32)(sizeof(sljit_sw)); } tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; for (i = tmp; i <= SLJIT_S0; i++) { FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); - offs += (sljit_si)(sizeof(sljit_sw)); + offs += (sljit_s32)(sizeof(sljit_sw)); } FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); @@ -722,7 +740,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #endif -static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { +static const sljit_ins data_transfer_insts[64 + 8] = { /* -------- Unsigned -------- */ @@ -841,7 +859,7 @@ static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { #undef ARCH_32_64 /* Simple cases, (no caching is required). */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_ins inst; @@ -891,7 +909,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl /* See getput_arg below. Note: can_cache is called only for binary operators. Those operator always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_sw high_short, next_high_short; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -940,9 +958,9 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ #endif /* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { - sljit_si tmp_r; + sljit_s32 tmp_r; sljit_ins inst; sljit_sw high_short, next_high_short; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -992,7 +1010,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, #endif arg &= REG_MASK; - high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff; + high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff; /* The getput_arg_fast should handle this otherwise. */ #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l); @@ -1010,7 +1028,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, } else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) { if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) { - next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; + next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; if (high_short == next_high_short) { compiler->cache_arg = SLJIT_MEM | arg; compiler->cache_argw = high_short; @@ -1107,27 +1125,27 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, #endif } -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { /* arg1 goes to TMP_REG1 or src reg arg2 goes to TMP_REG2, imm or src reg TMP_REG3 can be used for caching result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_si dst_r; - sljit_si src1_r; - sljit_si src2_r; - sljit_si sugg_src2_r = TMP_REG2; - sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); + sljit_s32 dst_r; + sljit_s32 src1_r; + sljit_s32 src2_r; + sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); if (!(input_flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -1136,14 +1154,14 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i /* Destination check. */ if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) return SLJIT_SUCCESS; dst_r = TMP_REG2; } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) sugg_src2_r = dst_r; } else { @@ -1178,7 +1196,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) dst_r = src2_r; } else if (src2 & SLJIT_IMM) { @@ -1243,10 +1261,10 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - sljit_si int_op = op & SLJIT_INT_OP; + sljit_s32 int_op = op & SLJIT_I32_OP; #endif CHECK_ERROR(); @@ -1257,33 +1275,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_BREAKPOINT: case SLJIT_NOP: return push_inst(compiler, NOP); - case SLJIT_LUMUL: - case SLJIT_LSMUL: + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); - return push_inst(compiler, (op == SLJIT_LUMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #else FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); - return push_inst(compiler, (op == SLJIT_LUMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #endif - case SLJIT_UDIVMOD: - case SLJIT_SDIVMOD: + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_UDIVMOD ? DIVWU : DIVW) : (op == SLJIT_UDIVMOD ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIVMOD ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); #endif return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); - case SLJIT_UDIVI: - case SLJIT_SDIVI: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return push_inst(compiler, (int_op ? (op == SLJIT_UDIVI ? DIVWU : DIVW) : (op == SLJIT_UDIVI ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); + return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); #else - return push_inst(compiler, (op == SLJIT_UDIVI ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); + return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); #endif } @@ -1293,12 +1311,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #define EMIT_MOV(type, type_flags, type_cast) \ emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; - sljit_si op_flags = GET_ALL_FLAGS(op); + sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_s32 op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -1312,21 +1330,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler if (op_flags & SLJIT_SET_O) FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); - if (op_flags & SLJIT_INT_OP) { + if (op_flags & SLJIT_I32_OP) { if (op < SLJIT_NOT) { if (FAST_IS_REG(src) && src == dst) { if (!TYPE_CAST_NEEDED(op)) return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) - op = SLJIT_MOV_UI; - if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) - op = SLJIT_MOVU_UI; - if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) - op = SLJIT_MOV_SI; - if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) - op = SLJIT_MOVU_SI; + if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) + op = SLJIT_MOV_U32; + if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) + op = SLJIT_MOVU_U32; + if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) + op = SLJIT_MOV_S32; + if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) + op = SLJIT_MOVU_S32; #endif } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -1334,7 +1352,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler /* Most operations expect sign extended arguments. */ flags |= INT_DATA | SIGNED_DATA; if (src & SLJIT_IMM) - srcw = (sljit_si)srcw; + srcw = (sljit_s32)srcw; } #endif } @@ -1343,58 +1361,58 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_MOV: case SLJIT_MOV_P: #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: #endif return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - case SLJIT_MOV_UI: - return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui)); + case SLJIT_MOV_U32: + return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32)); - case SLJIT_MOV_SI: - return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si)); + case SLJIT_MOV_S32: + return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32)); #endif - case SLJIT_MOV_UB: - return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub)); + case SLJIT_MOV_U8: + return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8)); - case SLJIT_MOV_SB: - return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb)); + case SLJIT_MOV_S8: + return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8)); - case SLJIT_MOV_UH: - return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh)); + case SLJIT_MOV_U16: + return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16)); - case SLJIT_MOV_SH: - return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh)); + case SLJIT_MOV_S16: + return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16)); case SLJIT_MOVU: case SLJIT_MOVU_P: #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - case SLJIT_MOVU_UI: - case SLJIT_MOVU_SI: + case SLJIT_MOVU_U32: + case SLJIT_MOVU_S32: #endif return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - case SLJIT_MOVU_UI: - return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui)); + case SLJIT_MOVU_U32: + return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32)); - case SLJIT_MOVU_SI: - return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si)); + case SLJIT_MOVU_S32: + return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32)); #endif - case SLJIT_MOVU_UB: - return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub)); + case SLJIT_MOVU_U8: + return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8)); - case SLJIT_MOVU_SB: - return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb)); + case SLJIT_MOVU_S8: + return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8)); - case SLJIT_MOVU_UH: - return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh)); + case SLJIT_MOVU_U16: + return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16)); - case SLJIT_MOVU_SH: - return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh)); + case SLJIT_MOVU_S16: + return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16)); case SLJIT_NOT: return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); @@ -1404,7 +1422,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_CLZ: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); #else return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw); #endif @@ -1448,12 +1466,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler ((src) & SLJIT_IMM) #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1467,13 +1485,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler src2 = TMP_ZERO; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) { + if (op & SLJIT_I32_OP) { /* Most operations expect sign extended arguments. */ flags |= INT_DATA | SIGNED_DATA; if (src1 & SLJIT_IMM) - src1w = (sljit_si)(src1w); + src1w = (sljit_s32)(src1w); if (src2 & SLJIT_IMM) - src2w = (sljit_si)(src2w); + src2w = (sljit_s32)(src2w); if (GET_FLAGS(op)) flags |= ALT_SIGN_EXT; } @@ -1549,7 +1567,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler } if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { if (!(op & SLJIT_SET_U)) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1560,7 +1578,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler } } if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ if (TEST_UL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1579,7 +1597,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } } - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: @@ -1587,7 +1605,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler case SLJIT_MUL: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) + if (op & SLJIT_I32_OP) flags |= ALT_FORM2; #endif if (!GET_FLAGS(op)) { @@ -1643,7 +1661,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler case SLJIT_SHL: case SLJIT_LSHR: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) + if (op & SLJIT_I32_OP) flags |= ALT_FORM2; #endif if (src2 & SLJIT_IMM) { @@ -1656,20 +1674,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1681,7 +1699,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; @@ -1691,8 +1709,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #endif } -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) @@ -1709,9 +1727,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #endif /* SLJIT_CONFIG_PPC_64 */ -static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (src & SLJIT_MEM) { /* We can ignore the temporary data store on the stack from caching point of view. */ @@ -1721,12 +1739,12 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) op = GET_OPCODE(op); - FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); + FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; - if (op == SLJIT_CONVW_FROMD) { + if (op == SLJIT_CONV_SW_FROM_F64) { if (FAST_IS_REG(dst)) { FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0)); return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); @@ -1777,21 +1795,21 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_IMM) { - if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) - srcw = (sljit_si)srcw; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; } - else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) { + else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); else @@ -1810,14 +1828,14 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * if (dst & SLJIT_MEM) return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); - if (op & SLJIT_SINGLE_OP) + if (op & SLJIT_F32_OP) return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); return SLJIT_SUCCESS; #else - sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_si invert_sign = 1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 invert_sign = 1; if (src & SLJIT_IMM) { FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000)); @@ -1848,16 +1866,16 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * if (dst & SLJIT_MEM) return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); - if (op & SLJIT_SINGLE_OP) + if (op & SLJIT_F32_OP) return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); return SLJIT_SUCCESS; #endif } -static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { if (src1 & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); @@ -1872,21 +1890,21 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); compiler->cache_arg = 0; compiler->cache_argw = 0; - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1896,14 +1914,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile } switch (GET_OPCODE(op)) { - case SLJIT_CONVD_FROMS: - op ^= SLJIT_SINGLE_OP; - if (op & SLJIT_SINGLE_OP) { + case SLJIT_CONV_F64_FROM_F32: + op ^= SLJIT_F32_OP; + if (op & SLJIT_F32_OP) { FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); break; } /* Fall through. */ - case SLJIT_DMOV: + case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); @@ -1911,10 +1929,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile dst_r = src; } break; - case SLJIT_DNEG: + case SLJIT_NEG_F64: FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src))); break; - case SLJIT_DABS: + case SLJIT_ABS_F64: FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src))); break; } @@ -1924,12 +1942,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r, flags = 0; + sljit_s32 dst_r, flags = 0; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1979,19 +1997,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile src2 = TMP_FREG2; switch (GET_OPCODE(op)) { - case SLJIT_DADD: + case SLJIT_ADD_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); break; - case SLJIT_DSUB: + case SLJIT_SUB_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); break; - case SLJIT_DMUL: + case SLJIT_MUL_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); break; - case SLJIT_DDIV: + case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); break; } @@ -2009,7 +2027,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -2027,7 +2045,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -2065,7 +2083,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_bo_bi_flags(sljit_si type) +static sljit_ins get_bo_bi_flags(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: @@ -2075,19 +2093,19 @@ static sljit_ins get_bo_bi_flags(sljit_si type) return (4 << 21) | (2 << 16); case SLJIT_LESS: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: return (12 << 21) | ((4 + 0) << 16); case SLJIT_GREATER_EQUAL: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: return (4 << 21) | ((4 + 0) << 16); case SLJIT_GREATER: - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: return (12 << 21) | ((4 + 1) << 16); case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: return (4 << 21) | ((4 + 1) << 16); case SLJIT_SIG_LESS: @@ -2110,16 +2128,16 @@ static sljit_ins get_bo_bi_flags(sljit_si type) case SLJIT_MUL_NOT_OVERFLOW: return (4 << 21) | (3 << 16); - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: return (12 << 21) | ((4 + 2) << 16); - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: return (4 << 21) | ((4 + 2) << 16); - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: return (12 << 21) | ((4 + 3) << 16); - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: return (4 << 21) | ((4 + 3) << 16); default: @@ -2128,7 +2146,7 @@ static sljit_ins get_bo_bi_flags(sljit_si type) } } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; sljit_ins bo_bi_flags; @@ -2160,10 +2178,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump = NULL; - sljit_si src_r; + sljit_s32 src_r; CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); @@ -2211,13 +2229,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil #define INVERT_BIT(dst) \ FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1)); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { - sljit_si reg, input_flags; - sljit_si flags = GET_ALL_FLAGS(op); + sljit_s32 reg, input_flags; + sljit_s32 flags = GET_ALL_FLAGS(op); sljit_sw original_dstw = dstw; CHECK_ERROR(); @@ -2235,7 +2253,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { ADJUST_LOCAL_OFFSET(src, srcw); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA; + input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA; #else input_flags = WORD_DATA; #endif @@ -2255,23 +2273,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com break; case SLJIT_LESS: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: GET_CR_BIT(4 + 0, reg); break; case SLJIT_GREATER_EQUAL: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: GET_CR_BIT(4 + 0, reg); INVERT_BIT(reg); break; case SLJIT_GREATER: - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: GET_CR_BIT(4 + 1, reg); break; case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: GET_CR_BIT(4 + 1, reg); INVERT_BIT(reg); break; @@ -2305,20 +2323,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com INVERT_BIT(reg); break; - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: GET_CR_BIT(4 + 2, reg); break; - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: GET_CR_BIT(4 + 2, reg); INVERT_BIT(reg); break; - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: GET_CR_BIT(4 + 3, reg); break; - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: GET_CR_BIT(4 + 3, reg); INVERT_BIT(reg); break; @@ -2333,7 +2351,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (op == SLJIT_MOV) input_flags = WORD_DATA; else { - op = SLJIT_MOV_UI; + op = SLJIT_MOV_U32; input_flags = INT_DATA; } #else @@ -2352,10 +2370,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; - sljit_si reg; + sljit_s32 reg; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); diff --git a/pcre2/src/sljit/sljitNativeSPARC_32.c b/pcre2/src/sljit/sljitNativeSPARC_32.c index 4a2e6293d..4a206f11d 100644 --- a/pcre2/src/sljit/sljitNativeSPARC_32.c +++ b/pcre2/src/sljit/sljitNativeSPARC_32.c @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw imm) { if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst)); @@ -35,26 +35,26 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sl #define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_si src1, sljit_sw src2) +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); switch (op) { case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); return SLJIT_SUCCESS; - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_UB) + if (op == SLJIT_MOV_U8) return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst)); FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); @@ -63,12 +63,12 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT_STOP(); return SLJIT_SUCCESS; - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); - return push_inst(compiler, (op == SLJIT_MOV_SH ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); + return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); } else if (dst != src2) SLJIT_ASSERT_STOP(); @@ -139,26 +139,28 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) { FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff); - inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff); + inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff); + inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff); inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } diff --git a/pcre2/src/sljit/sljitNativeSPARC_common.c b/pcre2/src/sljit/sljitNativeSPARC_common.c index 327c4267b..7445fc472 100644 --- a/pcre2/src/sljit/sljitNativeSPARC_common.c +++ b/pcre2/src/sljit/sljitNativeSPARC_common.c @@ -24,14 +24,16 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "SPARC" SLJIT_CPUINFO; } /* Length of an instruction word Both for sparc-32 and sparc-64 */ -typedef sljit_ui sljit_ins; +typedef sljit_u32 sljit_ins; + +#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) { @@ -82,6 +84,8 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) #endif } +#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */ + /* TMP_REG2 is not used by getput_arg */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) @@ -91,7 +95,7 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_FREG1 (0) #define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15 }; @@ -181,7 +185,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { /* dest_reg is the absolute name of the register Useful for reordering instructions in the delay slot. */ -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) { sljit_ins *ptr; SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS @@ -195,7 +199,7 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_ return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -209,7 +213,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } inst = (sljit_ins*)jump->addr; @@ -235,8 +239,9 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (jump->flags & IS_COND) inst--; + diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2; + if (jump->flags & IS_MOVABLE) { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2; if (diff <= MAX_DISP && diff >= MIN_DISP) { jump->flags |= PATCH_B; inst--; @@ -253,7 +258,8 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } } - diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; + diff += sizeof(sljit_ins); + if (diff <= MAX_DISP && diff >= MIN_DISP) { jump->flags |= PATCH_B; if (jump->flags & IS_COND) @@ -276,6 +282,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; @@ -292,9 +299,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); @@ -306,7 +316,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -316,7 +326,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #else jump->addr = (sljit_uw)(code_ptr - 6); #endif - code_ptr = detect_jump_type(jump, code_ptr, code); + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == word_count) { @@ -332,7 +342,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -340,22 +350,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(code_ptr - code <= (sljit_si)compiler->size); + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size); jump = compiler->jumps; while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_CALL) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); buf_ptr[0] = CALL | (addr & 0x3fffffff); break; } if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); break; @@ -374,7 +384,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); return code; } @@ -418,9 +433,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeSPARC_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -442,9 +457,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -454,7 +469,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); @@ -478,7 +493,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi #define ARCH_32_64(a, b) b #endif -static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { +static const sljit_ins data_transfer_insts[16 + 4] = { /* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), /* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), /* u b s */ OPC1(3) | OPC3(0x05) /* stb */, @@ -506,7 +521,7 @@ static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { #undef ARCH_32_64 /* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { SLJIT_ASSERT(arg & SLJIT_MEM); @@ -529,7 +544,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, /* See getput_arg below. Note: can_cache is called only for binary operators. Those operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); @@ -549,9 +564,9 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ } /* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { - sljit_si base, arg2, delay_slot; + sljit_s32 base, arg2, delay_slot; sljit_ins dest; SLJIT_ASSERT(arg & SLJIT_MEM); @@ -613,7 +628,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, slji return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base)); } -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg, arg, argw)) return compiler->error; @@ -622,26 +637,26 @@ static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_ return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { /* arg1 goes to TMP_REG1 or src reg arg2 goes to TMP_REG2, imm or src reg TMP_REG3 can be used for caching result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_si dst_r = TMP_REG2; - sljit_si src1_r; + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_si sugg_src2_r = TMP_REG2; + sljit_s32 sugg_src2_r = TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -649,13 +664,13 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f } if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) return SLJIT_SUCCESS; } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) sugg_src2_r = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) @@ -705,7 +720,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) dst_r = src2_r; } else if (src2 & SLJIT_IMM) { @@ -716,7 +731,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f } else { src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) + if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM)) dst_r = 0; } } @@ -758,7 +773,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -769,30 +784,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, TA, UNMOVABLE_INS); case SLJIT_NOP: return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_LUMUL: - case SLJIT_LSMUL: + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1)); #else #error "Implementation required" #endif - case SLJIT_UDIVMOD: - case SLJIT_SDIVMOD: - case SLJIT_UDIVI: - case SLJIT_SDIVI: - SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if ((op | 0x2) == SLJIT_UDIVI) + if ((op | 0x2) == SLJIT_DIV_UW) FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); else { FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); } - if (op <= SLJIT_SDIVMOD) + if (op <= SLJIT_DIVMOD_SW) FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); - if (op >= SLJIT_UDIVI) + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + if (op >= SLJIT_DIV_UW) return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)); @@ -804,11 +819,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -821,45 +836,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_UI: - return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_SI: - return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOV_S32: + return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); - case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); - case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); - case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_MOVU: case SLJIT_MOVU_P: return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_UI: - return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOVU_U32: + return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_SI: - return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOVU_S32: + return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + case SLJIT_MOVU_U8: + return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); - case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + case SLJIT_MOVU_S8: + return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); - case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + case SLJIT_MOVU_U16: + return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); - case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + case SLJIT_MOVU_S16: + return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_NOT: case SLJIT_CLZ: @@ -872,12 +887,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -914,20 +929,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg << 1; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -939,7 +954,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; @@ -949,13 +964,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #endif } -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) -static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); @@ -978,16 +993,16 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; if (src & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) - srcw = (sljit_si)srcw; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; #endif FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; @@ -1008,9 +1023,9 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { if (src1 & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); @@ -1029,21 +1044,21 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); compiler->cache_arg = 0; compiler->cache_argw = 0; - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; @@ -1055,30 +1070,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile src <<= 1; switch (GET_OPCODE(op)) { - case SLJIT_DMOV: + case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) { FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS)); - if (!(op & SLJIT_SINGLE_OP)) + if (!(op & SLJIT_F32_OP)) FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); } else dst_r = src; } break; - case SLJIT_DNEG: + case SLJIT_NEG_F64: FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + if (dst_r != src && !(op & SLJIT_F32_OP)) FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); break; - case SLJIT_DABS: + case SLJIT_ABS_F64: FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + if (dst_r != src && !(op & SLJIT_F32_OP)) FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); break; - case SLJIT_CONVD_FROMS: + case SLJIT_CONV_F64_FROM_F32: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS)); - op ^= SLJIT_SINGLE_OP; + op ^= SLJIT_F32_OP; break; } @@ -1087,12 +1102,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r, flags = 0; + sljit_s32 dst_r, flags = 0; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1146,19 +1161,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile src2 = TMP_FREG2; switch (GET_OPCODE(op)) { - case SLJIT_DADD: + case SLJIT_ADD_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; - case SLJIT_DSUB: + case SLJIT_SUB_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; - case SLJIT_DMUL: + case SLJIT_MUL_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; - case SLJIT_DDIV: + case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; } @@ -1176,7 +1191,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -1193,7 +1208,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -1231,33 +1246,33 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_cc(sljit_si type) +static sljit_ins get_cc(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: case SLJIT_MUL_NOT_OVERFLOW: - case SLJIT_D_NOT_EQUAL: /* Unordered. */ + case SLJIT_NOT_EQUAL_F64: /* Unordered. */ return DA(0x1); case SLJIT_NOT_EQUAL: case SLJIT_MUL_OVERFLOW: - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: return DA(0x9); case SLJIT_LESS: - case SLJIT_D_GREATER: /* Unordered. */ + case SLJIT_GREATER_F64: /* Unordered. */ return DA(0x5); case SLJIT_GREATER_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: return DA(0xd); case SLJIT_GREATER: - case SLJIT_D_GREATER_EQUAL: /* Unordered. */ + case SLJIT_GREATER_EQUAL_F64: /* Unordered. */ return DA(0xc); case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: return DA(0x4); case SLJIT_SIG_LESS: @@ -1273,11 +1288,11 @@ static sljit_ins get_cc(sljit_si type) return DA(0x2); case SLJIT_OVERFLOW: - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: return DA(0x7); case SLJIT_NOT_OVERFLOW: - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: return DA(0xf); default: @@ -1286,7 +1301,7 @@ static sljit_ins get_cc(sljit_si type) } } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; @@ -1298,7 +1313,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - if (type < SLJIT_D_EQUAL) { + if (type < SLJIT_EQUAL_F64) { jump->flags |= IS_COND; if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) jump->flags |= IS_MOVABLE; @@ -1332,10 +1347,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump = NULL; - sljit_si src_r; + sljit_s32 src_r; CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); @@ -1367,12 +1382,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return push_inst(compiler, NOP, UNMOVABLE_INS); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { - sljit_si reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0); + sljit_s32 reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0); CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); @@ -1395,7 +1410,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com } type &= 0xff; - if (type < SLJIT_D_EQUAL) + if (type < SLJIT_EQUAL_F64) FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS)); else FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS)); @@ -1412,9 +1427,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com #endif } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { - sljit_si reg; + sljit_s32 reg; struct sljit_const *const_; CHECK_ERROR_PTR(); diff --git a/pcre2/src/sljit/sljitNativeTILEGX_64.c b/pcre2/src/sljit/sljitNativeTILEGX_64.c index 1d6aa5a11..177a65b00 100644 --- a/pcre2/src/sljit/sljitNativeTILEGX_64.c +++ b/pcre2/src/sljit/sljitNativeTILEGX_64.c @@ -35,21 +35,21 @@ #define SIMM_16BIT_MIN (-0x8000) #define SIMM_17BIT_MAX (0xffff) #define SIMM_17BIT_MIN (-0x10000) -#define SIMM_32BIT_MIN (-0x80000000) #define SIMM_32BIT_MAX (0x7fffffff) -#define SIMM_48BIT_MIN (0x800000000000L) +#define SIMM_32BIT_MIN (-0x7fffffff - 1) #define SIMM_48BIT_MAX (0x7fffffff0000L) +#define SIMM_48BIT_MIN (-0x800000000000L) #define IMM16(imm) ((imm) & 0xffff) #define UIMM_16BIT_MAX (0xffff) -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define ADDR_TMP (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5) #define PIC_ADDR_REG TMP_REG2 -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7 }; @@ -58,11 +58,6 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define TMP_REG2_mapped 16 #define TMP_REG3_mapped 6 #define ADDR_TMP_mapped 7 -#define SLJIT_SAVED_REG1_mapped 30 -#define SLJIT_SAVED_REG2_mapped 31 -#define SLJIT_SAVED_REG3_mapped 32 -#define SLJIT_SAVED_EREG1_mapped 33 -#define SLJIT_SAVED_EREG2_mapped 34 /* Flags are keept in volatile registers. */ #define EQUAL_FLAG 8 @@ -111,7 +106,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { */ #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char *sljit_get_platform_name(void) { return "TileGX" SLJIT_CPUINFO; } @@ -312,7 +307,7 @@ struct jit_instr { #define JOFF_X1(x) create_JumpOff_X1(x) #define BOFF_X1(x) create_BrOff_X1(x) -static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = { +static const tilegx_mnemonic data_transfer_insts[16] = { /* u w s */ TILEGX_OPC_ST /* st */, /* u w l */ TILEGX_OPC_LD /* ld */, /* u b s */ TILEGX_OPC_ST1 /* st1 */, @@ -332,7 +327,7 @@ static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = { }; #ifdef TILEGX_JIT_DEBUG -static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line) +static sljit_s32 push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line) { sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); @@ -343,7 +338,7 @@ static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, return SLJIT_SUCCESS; } -static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins) +static sljit_s32 push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); @@ -354,7 +349,7 @@ static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins #define push_inst(a, b) push_inst_debug(a, b, __LINE__) #else -static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); @@ -399,6 +394,9 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) #define SUB(dst, srca, srcb) \ push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__) +#define MUL(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__) + #define NOR(dst, srca, srcb) \ push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__) @@ -547,8 +545,8 @@ const struct Format* compute_format() const struct Format* match = NULL; const struct Format *b = NULL; - unsigned int i = 0; - for (i; i < sizeof formats / sizeof formats[0]; i++) { + unsigned int i; + for (i = 0; i < sizeof formats / sizeof formats[0]; i++) { b = &formats[i]; if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) { match = b; @@ -559,7 +557,7 @@ const struct Format* compute_format() return match; } -sljit_si assign_pipes() +sljit_s32 assign_pipes() { unsigned long output_registers = 0; unsigned int i = 0; @@ -623,9 +621,8 @@ tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst) return bits; } -static sljit_si update_buffer(struct sljit_compiler *compiler) +static sljit_s32 update_buffer(struct sljit_compiler *compiler) { - int count; int i; int orig_index = inst_buf_index; struct jit_instr inst0 = inst_buf[0]; @@ -736,13 +733,15 @@ static sljit_si update_buffer(struct sljit_compiler *compiler) SLJIT_ASSERT_STOP(); } -static sljit_si flush_buffer(struct sljit_compiler *compiler) +static sljit_s32 flush_buffer(struct sljit_compiler *compiler) { - while (inst_buf_index != 0) - update_buffer(compiler); + while (inst_buf_index != 0) { + FAIL_IF(update_buffer(compiler)); + } + return SLJIT_SUCCESS; } -static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) +static sljit_s32 push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) { if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) FAIL_IF(update_buffer(compiler)); @@ -762,7 +761,7 @@ static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic o return SLJIT_SUCCESS; } -static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line) +static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line) { if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) FAIL_IF(update_buffer(compiler)); @@ -787,6 +786,7 @@ static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic o case TILEGX_OPC_ADD: case TILEGX_OPC_AND: case TILEGX_OPC_SUB: + case TILEGX_OPC_MULX: case TILEGX_OPC_OR: case TILEGX_OPC_XOR: case TILEGX_OPC_NOR: @@ -822,7 +822,7 @@ static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic o return SLJIT_SUCCESS; } -static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line) +static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line) { if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) FAIL_IF(update_buffer(compiler)); @@ -867,7 +867,7 @@ static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic o return SLJIT_SUCCESS; } -static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line) +static sljit_s32 push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line) { if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) FAIL_IF(update_buffer(compiler)); @@ -883,7 +883,7 @@ static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic o return SLJIT_SUCCESS; } -static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line) +static sljit_s32 push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line) { if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) FAIL_IF(update_buffer(compiler)); @@ -905,7 +905,6 @@ static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ sljit_sw diff; sljit_uw target_addr; sljit_ins *inst; - sljit_ins saved_inst; if (jump->flags & SLJIT_REWRITABLE_JUMP) return code_ptr; @@ -1009,7 +1008,7 @@ SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compi struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); @@ -1118,7 +1117,7 @@ SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compi return code; } -static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) { if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN) @@ -1141,7 +1140,7 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, return SHL16INSLI(dst_ar, dst_ar, imm); } -static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) +static sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush) { /* Should *not* be optimized as load_immediate, as pcre relocation mechanism will match this fixed 4-instruction pattern. */ @@ -1156,7 +1155,7 @@ static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, slj return SHL16INSLI(dst_ar, dst_ar, imm); } -static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) +static sljit_s32 emit_const_64(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush) { /* Should *not* be optimized as load_immediate, as pcre relocation mechanism will match this fixed 4-instruction pattern. */ @@ -1173,18 +1172,18 @@ static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_ins base; - sljit_ins bundle = 0; - + sljit_s32 i, tmp; + CHECK_ERROR(); - check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - local_size += (saveds + 1) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); local_size = (local_size + 7) & ~7; compiler->local_size = local_size; @@ -1200,56 +1199,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil local_size = 0; } + /* Save the return address. */ FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8)); - if (saveds >= 1) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8)); + /* Save the S registers. */ + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); + } - if (saveds >= 2) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8)); + /* Save the R registers that need to be reserved. */ + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); + } - if (saveds >= 3) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8)); - - if (saveds >= 4) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8)); - - if (saveds >= 5) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8)); - - if (args >= 1) - FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO)); - - if (args >= 2) - FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO)); - - if (args >= 3) - FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO)); + /* Move the arguments to S registers. */ + for (i = 0; i < args; i++) { + FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO)); + } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - local_size += (saveds + 1) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); compiler->local_size = (local_size + 7) & ~7; + + return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si local_size; + sljit_s32 local_size; sljit_ins base; - int addr_initialized = 0; + sljit_s32 i, tmp; + sljit_s32 saveds; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); @@ -1263,50 +1258,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi local_size = 0; } + /* Restore the return address. */ FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); - FAIL_IF(LD(RA, ADDR_TMP_mapped)); + FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8)); - if (compiler->saveds >= 5) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48)); - addr_initialized = 1; - - FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8)); + /* Restore the S registers. */ + saveds = compiler->saveds; + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); } - if (compiler->saveds >= 4) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40)); - addr_initialized = 1; - } - - FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8)); - } - - if (compiler->saveds >= 3) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32)); - addr_initialized = 1; - } - - FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8)); - } - - if (compiler->saveds >= 2) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24)); - addr_initialized = 1; - } - - FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8)); - } - - if (compiler->saveds >= 1) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16)); - /* addr_initialized = 1; no need to initialize as it's the last one. */ - } - - FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8)); + /* Restore the R registers that need to be reserved. */ + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); } if (compiler->local_size <= SIMM_16BIT_MAX) @@ -1320,7 +1285,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi /* reg_ar is an absoulute register! */ /* Can perform an operation using at most 1 instruction. */ -static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) { SLJIT_ASSERT(arg & SLJIT_MEM); @@ -1346,7 +1311,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, /* See getput_arg below. Note: can_cache is called only for binary operators. Those operators always uses word arguments without write back. */ -static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); @@ -1372,9 +1337,9 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ } /* Emit the necessary instructions. See can_cache above. */ -static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { - sljit_si tmp_ar, base; + sljit_s32 tmp_ar, base; SLJIT_ASSERT(arg & SLJIT_MEM); if (!(next_arg & SLJIT_MEM)) { @@ -1565,7 +1530,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, slji return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); } -static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) return compiler->error; @@ -1575,17 +1540,17 @@ static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_ return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ @@ -1599,10 +1564,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) @@ -1617,9 +1582,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * return JR(RA); } -static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2) +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { - sljit_si overflow_ra = 0; + sljit_s32 overflow_ra = 0; switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1629,42 +1594,48 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return ADD(reg_map[dst], reg_map[src2], ZERO); return SLJIT_SUCCESS; - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SI) + if (op == SLJIT_MOV_S32) return BFEXTS(reg_map[dst], reg_map[src2], 0, 31); - return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); - } else if (dst != src2) - SLJIT_ASSERT_STOP(); + return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); + } else if (dst != src2) { + SLJIT_ASSERT(src2 == 0); + return ADD(reg_map[dst], reg_map[src2], ZERO); + } return SLJIT_SUCCESS; - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) + if (op == SLJIT_MOV_S8) return BFEXTS(reg_map[dst], reg_map[src2], 0, 7); return BFEXTU(reg_map[dst], reg_map[src2], 0, 7); - } else if (dst != src2) - SLJIT_ASSERT_STOP(); + } else if (dst != src2) { + SLJIT_ASSERT(src2 == 0); + return ADD(reg_map[dst], reg_map[src2], ZERO); + } return SLJIT_SUCCESS; - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) + if (op == SLJIT_MOV_S16) return BFEXTS(reg_map[dst], reg_map[src2], 0, 15); return BFEXTU(reg_map[dst], reg_map[src2], 0, 15); - } else if (dst != src2) - SLJIT_ASSERT_STOP(); + } else if (dst != src2) { + SLJIT_ASSERT(src2 == 0); + return ADD(reg_map[dst], reg_map[src2], ZERO); + } return SLJIT_SUCCESS; @@ -1811,7 +1782,6 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj else { /* Rare ocasion. */ FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); - overflow_ra = TMP_EREG2; } } @@ -1903,6 +1873,17 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; + case SLJIT_MUL: + if (flags & SRC2_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2)); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2])); + + return SLJIT_SUCCESS; + #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \ @@ -1950,8 +1931,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { \ if (op & SLJIT_SET_E) \ FAIL_IF(push_3_buffer( \ - compiler, op_imm, reg_map[dst], reg_map[src1], \ - src2 & 0x3F, __LINE__)); \ + compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ + reg_map[src2], __LINE__)); \ if (CHECK_FLAGS(SLJIT_SET_E)) \ FAIL_IF(push_3_buffer( \ compiler, op_norm, reg_map[dst], reg_map[src1], \ @@ -1975,16 +1956,16 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { /* arg1 goes to TMP_REG1 or src reg. arg2 goes to TMP_REG2, imm or src reg. TMP_REG3 can be used for caching. result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_si dst_r = TMP_REG2; - sljit_si src1_r; + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; sljit_sw src2_r = 0; - sljit_si sugg_src2_r = TMP_REG2; + sljit_s32 sugg_src2_r = TMP_REG2; if (!(flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -1992,14 +1973,14 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f } if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) return SLJIT_SUCCESS; if (GET_FLAGS(op)) flags |= UNUSED_DEST; } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) sugg_src2_r = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw)) flags |= SLOW_DEST; @@ -2052,7 +2033,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) dst_r = src2_r; } else if (src2 & SLJIT_IMM) { if (!(flags & SRC2_IMM)) { @@ -2061,7 +2042,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f src2_r = sugg_src2_r; } else { src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) + if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM)) dst_r = 0; } } @@ -2101,70 +2082,65 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw, sljit_s32 type) { - sljit_si sugg_dst_ar, dst_ar; - sljit_si flags = GET_ALL_FLAGS(op); + sljit_s32 sugg_dst_ar, dst_ar; + sljit_s32 flags = GET_ALL_FLAGS(op); + sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; op = GET_OPCODE(op); + if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32) + mem_type = INT_DATA | SIGNED_DATA; sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2]; compiler->cache_arg = 0; compiler->cache_argw = 0; if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); src = TMP_REG1; srcw = 0; } - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_NOT_EQUAL: + switch (type & 0xff) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1)); dst_ar = sugg_dst_ar; break; - case SLJIT_C_LESS: - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_LESS: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: dst_ar = ULESS_FLAG; break; - case SLJIT_C_GREATER: - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_GREATER: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: dst_ar = UGREATER_FLAG; break; - case SLJIT_C_SIG_LESS: - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: dst_ar = LESS_FLAG; break; - case SLJIT_C_SIG_GREATER: - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: dst_ar = GREATER_FLAG; break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_NOT_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: dst_ar = OVERFLOW_FLAG; break; - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1)); dst_ar = sugg_dst_ar; type ^= 0x1; /* Flip type bit for the XORI below. */ break; - case SLJIT_C_FLOAT_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: - dst_ar = EQUAL_FLAG; - break; default: SLJIT_ASSERT_STOP(); @@ -2180,11 +2156,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (op >= SLJIT_ADD) { if (TMP_REG2_mapped != dst_ar) FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO)); - return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); + return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); } if (dst & SLJIT_MEM) - return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw); + return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw); if (sugg_dst_ar != dst_ar) return ADD(sugg_dst_ar, dst_ar, ZERO); @@ -2192,9 +2168,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { @@ -2204,20 +2180,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_BREAKPOINT: return PI(BPT); - case SLJIT_UMUL: - case SLJIT_SMUL: - case SLJIT_UDIV: - case SLJIT_SDIV: + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: SLJIT_ASSERT_STOP(); } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -2226,45 +2204,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_UI: - return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_SI: - return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOV_S32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw); - case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw); - case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw); - case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw); case SLJIT_MOVU: case SLJIT_MOVU_P: return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_UI: - return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOVU_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_SI: - return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOVU_S32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); + case SLJIT_MOVU_U8: + return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw); - case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); + case SLJIT_MOVU_S8: + return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw); - case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); + case SLJIT_MOVU_U16: + return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw); - case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); + case SLJIT_MOVU_S16: + return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw); case SLJIT_NOT: return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); @@ -2273,16 +2251,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); case SLJIT_CLZ: - return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -2309,7 +2287,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler case SLJIT_ASHR: if (src2 & SLJIT_IMM) src2w &= 0x3f; - if (op & SLJIT_INT_OP) + if (op & SLJIT_I32_OP) src2w &= 0x1f; return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); @@ -2325,7 +2303,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_comp flush_buffer(compiler); CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -2336,15 +2314,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_comp return label; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { - sljit_si src_r = TMP_REG2; + sljit_s32 src_r = TMP_REG2; struct sljit_jump *jump = NULL; flush_buffer(compiler); CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) { @@ -2404,8 +2382,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return SLJIT_SUCCESS; - } else if (src & SLJIT_MEM) + } else if (src & SLJIT_MEM) { FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + flush_buffer(compiler); + } FAIL_IF(JR_SOLO(reg_map[src_r])); @@ -2423,16 +2403,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil inst = BNEZ_X1 | SRCA_X1(src); \ flags = IS_COND; -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; sljit_ins inst; - sljit_si flags = 0; + sljit_s32 flags = 0; flush_buffer(compiler); CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); @@ -2440,48 +2420,42 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil type &= 0xff; switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_EQUAL: BR_NZ(EQUAL_FLAG); break; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_NOT_EQUAL: BR_Z(EQUAL_FLAG); break; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: BR_Z(ULESS_FLAG); break; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: BR_NZ(ULESS_FLAG); break; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: BR_Z(UGREATER_FLAG); break; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: BR_NZ(UGREATER_FLAG); break; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: BR_Z(LESS_FLAG); break; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: BR_NZ(LESS_FLAG); break; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: BR_Z(GREATER_FLAG); break; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: BR_NZ(GREATER_FLAG); break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: BR_Z(OVERFLOW_FLAG); break; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: BR_NZ(OVERFLOW_FLAG); break; default: @@ -2513,30 +2487,30 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { return 0; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; - sljit_si reg; + sljit_s32 reg; flush_buffer(compiler); CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const)); @@ -2552,13 +2526,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_comp return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target) { sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43); - inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43); - inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43); + inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_target >> 32) & 0xffff) << 43); + inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_target >> 16) & 0xffff) << 43); + inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_target & 0xffff) << 43); SLJIT_CACHE_FLUSH(inst, inst + 3); } @@ -2572,3 +2546,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43); SLJIT_CACHE_FLUSH(inst, inst + 4); } + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + return SLJIT_ERR_UNSUPPORTED; +} + diff --git a/pcre2/src/sljit/sljitNativeX86_32.c b/pcre2/src/sljit/sljitNativeX86_32.c index d7129c8e2..00333f6b3 100644 --- a/pcre2/src/sljit/sljitNativeX86_32.c +++ b/pcre2/src/sljit/sljitNativeX86_32.c @@ -26,19 +26,19 @@ /* x86 32-bit arch dependent functions. */ -static sljit_si emit_do_imm(struct sljit_compiler *compiler, sljit_ub opcode, sljit_sw imm) +static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) { - sljit_ub *inst; + sljit_u8 *inst; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); FAIL_IF(!inst); INC_SIZE(1 + sizeof(sljit_sw)); *inst++ = opcode; - *(sljit_sw*)inst = imm; + sljit_unaligned_store_sw(inst, imm); return SLJIT_SUCCESS; } -static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset) { if (type == SLJIT_JUMP) { *code_ptr++ = JMP_i32; @@ -57,18 +57,18 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ if (jump->flags & JUMP_LABEL) jump->flags |= PATCH_MW; else - *(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4); + sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset); code_ptr += 4; return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si size; - sljit_ub *inst; + sljit_s32 size; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -83,7 +83,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #else size += (args > 0 ? (2 + args * 3) : 0); #endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); @@ -143,7 +143,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil if (options & SLJIT_DOUBLE_ALIGNMENT) { local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7); - inst = (sljit_ub*)ensure_buf(compiler, 1 + 17); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 17); FAIL_IF(!inst); INC_SIZE(17); @@ -151,12 +151,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP]; inst[2] = GROUP_F7; inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP]; - *(sljit_sw*)(inst + 4) = 0x4; + sljit_unaligned_store_sw(inst + 4, 0x4); inst[8] = JNE_i8; inst[9] = 6; inst[10] = GROUP_BINARY_81; inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP]; - *(sljit_sw*)(inst + 12) = 0x4; + sljit_unaligned_store_sw(inst + 12, 0x4); inst[16] = PUSH_r + reg_map[TMP_REG1]; } else @@ -183,9 +183,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -205,10 +205,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si size; - sljit_ub *inst; + sljit_s32 size; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); @@ -223,7 +223,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi #if !defined(__APPLE__) if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); FAIL_IF(!inst); INC_SIZE(3); @@ -242,7 +242,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (compiler->args > 0) size += 2; #endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); @@ -271,16 +271,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi /* --------------------------------------------------------------------- */ /* Size contains the flags as well. */ -static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size, +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, /* The register or immediate operand. */ - sljit_si a, sljit_sw imma, + sljit_s32 a, sljit_sw imma, /* The general operand (not immediate). */ - sljit_si b, sljit_sw immb) + sljit_s32 b, sljit_sw immb) { - sljit_ub *inst; - sljit_ub *buf_ptr; - sljit_si flags = size & ~0xf; - sljit_si inst_size; + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_s32 flags = size & ~0xf; + sljit_s32 inst_size; /* Both cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); @@ -310,7 +310,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else if (immb != 0 && !(b & OFFS_REG_MASK)) { /* Immediate operand. */ if (immb <= 127 && immb >= -128) - inst_size += sizeof(sljit_sb); + inst_size += sizeof(sljit_s8); else inst_size += sizeof(sljit_sw); } @@ -347,7 +347,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); - inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); PTR_FAIL_IF(!inst); /* Encoding the byte. */ @@ -406,7 +406,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (immb <= 127 && immb >= -128) *buf_ptr++ = immb; /* 8 bit displacement. */ else { - *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_sw); } } @@ -418,7 +418,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } else { *buf_ptr++ |= 0x05; - *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_sw); } @@ -426,9 +426,9 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (flags & EX86_BYTE_ARG) *buf_ptr = imma; else if (flags & EX86_HALF_ARG) - *(short*)buf_ptr = imma; + sljit_unaligned_store_s16(buf_ptr, imma); else if (!(flags & EX86_SHIFT_INS)) - *(sljit_sw*)buf_ptr = imma; + sljit_unaligned_store_sw(buf_ptr, imma); } return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); @@ -438,12 +438,12 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* Call / return instructions */ /* --------------------------------------------------------------------- */ -static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) +static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) { - sljit_ub *inst; + sljit_u8 *inst; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - inst = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); + inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); FAIL_IF(!inst); INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); @@ -452,7 +452,7 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj *inst++ = MOV_r_rm; *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; #else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); FAIL_IF(!inst); INC_SIZE(4 * (type - SLJIT_CALL0)); @@ -476,9 +476,9 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - sljit_ub *inst; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -492,7 +492,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (FAST_IS_REG(dst)) { /* Unused dest is possible here. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); @@ -507,9 +507,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { - sljit_ub *inst; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -518,7 +518,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * CHECK_EXTRA_REGS(src, srcw, (void)0); if (FAST_IS_REG(src)) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); FAIL_IF(!inst); INC_SIZE(1 + 1); @@ -530,18 +530,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * *inst++ = GROUP_FF; *inst |= PUSH_rm; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); } else { /* SLJIT_IMM. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1); FAIL_IF(!inst); INC_SIZE(5 + 1); *inst++ = PUSH_i32; - *(sljit_sw*)inst = srcw; + sljit_unaligned_store_sw(inst, srcw); inst += sizeof(sljit_sw); } diff --git a/pcre2/src/sljit/sljitNativeX86_64.c b/pcre2/src/sljit/sljitNativeX86_64.c index 1790d8a4d..bc92d4568 100644 --- a/pcre2/src/sljit/sljitNativeX86_64.c +++ b/pcre2/src/sljit/sljitNativeX86_64.c @@ -26,20 +26,20 @@ /* x86 64-bit arch dependent functions. */ -static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { - sljit_ub *inst; + sljit_u8 *inst; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); FAIL_IF(!inst); INC_SIZE(2 + sizeof(sljit_sw)); *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); - *(sljit_sw*)inst = imm; + sljit_unaligned_store_sw(inst, imm); return SLJIT_SUCCESS; } -static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type) { if (type < SLJIT_JUMP) { /* Invert type. */ @@ -55,7 +55,7 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ if (jump->flags & JUMP_LABEL) jump->flags |= PATCH_MD; else - *(sljit_sw*)code_ptr = jump->u.target; + sljit_unaligned_store_sw(code_ptr, jump->u.target); code_ptr += sizeof(sljit_sw); *code_ptr++ = REX_B; @@ -65,34 +65,12 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ return code_ptr; } -static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si)); - - if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) { - *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32; - *(sljit_sw*)code_ptr = delta; - } - else { - SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second); - *code_ptr++ = REX_W | REX_B; - *code_ptr++ = MOV_r_i32 + 1; - *(sljit_sw*)code_ptr = addr; - code_ptr += sizeof(sljit_sw); - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); - } - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) -{ - sljit_si i, tmp, size, saved_register_size; - sljit_ub *inst; + sljit_s32 i, tmp, size, saved_register_size; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -106,7 +84,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; for (i = SLJIT_S0; i >= tmp; i--) { size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); if (reg_map[i] >= 8) @@ -116,7 +94,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); if (reg_map[i] >= 8) @@ -126,7 +104,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil if (args > 0) { size = args * 3; - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); @@ -172,9 +150,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #ifdef _WIN64 if (local_size > 1024) { /* Allocate stack for the callback, which grows the stack. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si))); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32))); FAIL_IF(!inst); - INC_SIZE(4 + (3 + sizeof(sljit_si))); + INC_SIZE(4 + (3 + sizeof(sljit_s32))); *inst++ = REX_W; *inst++ = GROUP_BINARY_83; *inst++ = MOD_REG | SUB | 4; @@ -193,7 +171,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = REX_W; *inst++ = MOV_rm_i32; *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; - *(sljit_si*)inst = local_size; + sljit_unaligned_store_s32(inst, local_size); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -204,7 +182,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil SLJIT_ASSERT(local_size > 0); if (local_size <= 127) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); *inst++ = REX_W; @@ -213,35 +191,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = local_size; } else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); FAIL_IF(!inst); INC_SIZE(7); *inst++ = REX_W; *inst++ = GROUP_BINARY_81; *inst++ = MOD_REG | SUB | 4; - *(sljit_si*)inst = local_size; - inst += sizeof(sljit_si); + sljit_unaligned_store_s32(inst, local_size); + inst += sizeof(sljit_s32); } #ifdef _WIN64 /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ if (fscratches >= 6 || fsaveds >= 1) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); FAIL_IF(!inst); INC_SIZE(5); *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247429; + sljit_unaligned_store_s32(inst, 0x20247429); } #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, - sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, - sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_si saved_register_size; + sljit_s32 saved_register_size; CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); @@ -253,10 +231,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compi return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - sljit_si i, tmp, size; - sljit_ub *inst; + sljit_s32 i, tmp, size; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); @@ -267,17 +245,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi #ifdef _WIN64 /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); FAIL_IF(!inst); INC_SIZE(5); *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247428; + sljit_unaligned_store_s32(inst, 0x20247428); } #endif SLJIT_ASSERT(compiler->local_size > 0); if (compiler->local_size <= 127) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); *inst++ = REX_W; @@ -286,19 +264,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi *inst = compiler->local_size; } else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); FAIL_IF(!inst); INC_SIZE(7); *inst++ = REX_W; *inst++ = GROUP_BINARY_81; *inst++ = MOD_REG | ADD | 4; - *(sljit_si*)inst = compiler->local_size; + sljit_unaligned_store_s32(inst, compiler->local_size); } tmp = compiler->scratches; for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); if (reg_map[i] >= 8) @@ -309,7 +287,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; for (i = tmp; i <= SLJIT_S0; i++) { size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); if (reg_map[i] >= 8) @@ -317,7 +295,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi POP_REG(reg_lmap[i]); } - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); RET(); @@ -328,32 +306,32 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi /* Operators */ /* --------------------------------------------------------------------- */ -static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm) +static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) { - sljit_ub *inst; - sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si); + sljit_u8 *inst; + sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32); - inst = (sljit_ub*)ensure_buf(compiler, 1 + length); + inst = (sljit_u8*)ensure_buf(compiler, 1 + length); FAIL_IF(!inst); INC_SIZE(length); if (rex) *inst++ = rex; *inst++ = opcode; - *(sljit_si*)inst = imm; + sljit_unaligned_store_s32(inst, imm); return SLJIT_SUCCESS; } -static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size, +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, /* The register or immediate operand. */ - sljit_si a, sljit_sw imma, + sljit_s32 a, sljit_sw imma, /* The general operand (not immediate). */ - sljit_si b, sljit_sw immb) + sljit_s32 b, sljit_sw immb) { - sljit_ub *inst; - sljit_ub *buf_ptr; - sljit_ub rex = 0; - sljit_si flags = size & ~0xf; - sljit_si inst_size; + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 rex = 0; + sljit_s32 flags = size & ~0xf; + sljit_s32 inst_size; /* The immediate operand must be 32 bit. */ SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); @@ -400,7 +378,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } if ((b & REG_MASK) == SLJIT_UNUSED) - inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */ + inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ else { if (reg_map[b & REG_MASK] >= 8) rex |= REX_B; @@ -408,12 +386,12 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { /* Immediate operand. */ if (immb <= 127 && immb >= -128) - inst_size += sizeof(sljit_sb); + inst_size += sizeof(sljit_s8); else - inst_size += sizeof(sljit_si); + inst_size += sizeof(sljit_s32); } else if (reg_lmap[b & REG_MASK] == 5) - inst_size += sizeof(sljit_sb); + inst_size += sizeof(sljit_s8); if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { inst_size += 1; /* SIB byte. */ @@ -444,7 +422,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else if (flags & EX86_HALF_ARG) inst_size += sizeof(short); else - inst_size += sizeof(sljit_si); + inst_size += sizeof(sljit_s32); } else { SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); @@ -456,7 +434,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (rex) inst_size++; - inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); PTR_FAIL_IF(!inst); /* Encoding the byte. */ @@ -516,8 +494,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (immb <= 127 && immb >= -128) *buf_ptr++ = immb; /* 8 bit displacement. */ else { - *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_si); + sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); } } } @@ -533,17 +511,17 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else { *buf_ptr++ |= 0x04; *buf_ptr++ = 0x25; - *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_si); + sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); } if (a & SLJIT_IMM) { if (flags & EX86_BYTE_ARG) *buf_ptr = imma; else if (flags & EX86_HALF_ARG) - *(short*)buf_ptr = imma; + sljit_unaligned_store_s16(buf_ptr, imma); else if (!(flags & EX86_SHIFT_INS)) - *(sljit_si*)buf_ptr = imma; + sljit_unaligned_store_s32(buf_ptr, imma); } return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); @@ -553,14 +531,14 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* Call / return instructions */ /* --------------------------------------------------------------------- */ -static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) +static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) { - sljit_ub *inst; + sljit_u8 *inst; #ifndef _WIN64 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); - inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); if (type >= SLJIT_CALL3) { @@ -574,7 +552,7 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj #else SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); - inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); if (type >= SLJIT_CALL3) { @@ -589,9 +567,9 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - sljit_ub *inst; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); @@ -603,14 +581,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (FAST_IS_REG(dst)) { if (reg_map[dst] < 8) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); POP_REG(reg_lmap[dst]); return SLJIT_SUCCESS; } - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); INC_SIZE(2); *inst++ = REX_B; @@ -626,9 +604,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { - sljit_ub *inst; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); @@ -641,14 +619,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * if (FAST_IS_REG(src)) { if (reg_map[src] < 8) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); FAIL_IF(!inst); INC_SIZE(1 + 1); PUSH_REG(reg_lmap[src]); } else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1); FAIL_IF(!inst); INC_SIZE(2 + 1); @@ -664,20 +642,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * *inst++ = GROUP_FF; *inst |= PUSH_rm; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); } else { SLJIT_ASSERT(IS_HALFWORD(srcw)); /* SLJIT_IMM. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1); FAIL_IF(!inst); INC_SIZE(5 + 1); *inst++ = PUSH_i32; - *(sljit_si*)inst = srcw; - inst += sizeof(sljit_si); + sljit_unaligned_store_s32(inst, srcw); + inst += sizeof(sljit_s32); } RET(); @@ -689,12 +667,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * /* Extend input */ /* --------------------------------------------------------------------- */ -static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; - sljit_si dst_r; + sljit_u8* inst; + sljit_s32 dst_r; compiler->mode32 = 0; @@ -704,7 +682,7 @@ static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); FAIL_IF(!inst); *inst = MOV_rm_i32; return SLJIT_SUCCESS; @@ -712,7 +690,7 @@ static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, return emit_load_imm64(compiler, dst, srcw); } compiler->mode32 = 1; - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); FAIL_IF(!inst); *inst = MOV_rm_i32; compiler->mode32 = 0; diff --git a/pcre2/src/sljit/sljitNativeX86_common.c b/pcre2/src/sljit/sljitNativeX86_common.c index b7bbb0384..12a0e272a 100644 --- a/pcre2/src/sljit/sljitNativeX86_common.c +++ b/pcre2/src/sljit/sljitNativeX86_common.c @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "x86" SLJIT_CPUINFO; } @@ -66,7 +66,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) /* Last register + 1. */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 }; @@ -89,20 +89,20 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { therefore r12 is better for SAVED_EREG than SAVED_REG. */ #ifndef _WIN64 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 }; #else /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 }; #endif @@ -269,23 +269,48 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { built-in CPU features. Therefore they can be overwritten by different threads if they detect the CPU features in the same time. */ #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) -static sljit_si cpu_has_sse2 = -1; +static sljit_s32 cpu_has_sse2 = -1; #endif -static sljit_si cpu_has_cmov = -1; +static sljit_s32 cpu_has_cmov = -1; -#if defined(_MSC_VER) && _MSC_VER >= 1400 +#ifdef _WIN32_WCE +#include +#elif defined(_MSC_VER) && _MSC_VER >= 1400 #include #endif +/******************************************************/ +/* Unaligned-store functions */ +/******************************************************/ + +static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +/******************************************************/ +/* Utility functions */ +/******************************************************/ + static void get_cpu_features(void) { - sljit_ui features; + sljit_u32 features; #if defined(_MSC_VER) && _MSC_VER >= 1400 int CPUInfo[4]; __cpuid(CPUInfo, 1); - features = (sljit_ui)CPUInfo[3]; + features = (sljit_u32)CPUInfo[3]; #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) @@ -328,31 +353,31 @@ static void get_cpu_features(void) cpu_has_cmov = (features >> 15) & 0x1; } -static sljit_ub get_jump_code(sljit_si type) +static sljit_u8 get_jump_code(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_D_EQUAL: + case SLJIT_EQUAL_F64: return 0x84 /* je */; case SLJIT_NOT_EQUAL: - case SLJIT_D_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: return 0x85 /* jne */; case SLJIT_LESS: - case SLJIT_D_LESS: + case SLJIT_LESS_F64: return 0x82 /* jc */; case SLJIT_GREATER_EQUAL: - case SLJIT_D_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: return 0x83 /* jae */; case SLJIT_GREATER: - case SLJIT_D_GREATER: + case SLJIT_GREATER_F64: return 0x87 /* jnbe */; case SLJIT_LESS_EQUAL: - case SLJIT_D_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: return 0x86 /* jbe */; case SLJIT_SIG_LESS: @@ -375,30 +400,31 @@ static sljit_ub get_jump_code(sljit_si type) case SLJIT_MUL_NOT_OVERFLOW: return 0x81 /* jno */; - case SLJIT_D_UNORDERED: + case SLJIT_UNORDERED_F64: return 0x8a /* jp */; - case SLJIT_D_ORDERED: + case SLJIT_ORDERED_F64: return 0x8b /* jpo */; } return 0; } -static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset); +#else +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type); #endif -static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type) +static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset) { - sljit_si short_jump; + sljit_s32 short_jump; sljit_uw label_addr; if (jump->flags & JUMP_LABEL) label_addr = (sljit_uw)(code + jump->u.label->size); else - label_addr = jump->u.target; + label_addr = jump->u.target - executable_offset; + short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -430,13 +456,13 @@ static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code if (short_jump) { jump->flags |= PATCH_MB; - code_ptr += sizeof(sljit_sb); + code_ptr += sizeof(sljit_s8); } else { jump->flags |= PATCH_MW; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) code_ptr += sizeof(sljit_sw); #else - code_ptr += sizeof(sljit_si); + code_ptr += sizeof(sljit_s32); #endif } @@ -446,11 +472,13 @@ static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { struct sljit_memory_fragment *buf; - sljit_ub *code; - sljit_ub *code_ptr; - sljit_ub *buf_ptr; - sljit_ub *buf_end; - sljit_ub len; + sljit_u8 *code; + sljit_u8 *code_ptr; + sljit_u8 *buf_ptr; + sljit_u8 *buf_end; + sljit_u8 len; + sljit_sw executable_offset; + sljit_sw jump_addr; struct sljit_label *label; struct sljit_jump *jump; @@ -461,7 +489,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil reverse_buf(compiler); /* Second code generation pass. */ - code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size); + code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size); PTR_FAIL_WITH_EXEC_IF(code); buf = compiler->buf; @@ -469,6 +497,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + executable_offset = SLJIT_EXEC_OFFSET(code); + do { buf_ptr = buf->memory; buf_end = buf_ptr + buf->used_size; @@ -476,40 +506,33 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil len = *buf_ptr++; if (len > 0) { /* The code is already generated. */ - SLJIT_MEMMOVE(code_ptr, buf_ptr, len); + SLJIT_MEMCPY(code_ptr, buf_ptr, len); code_ptr += len; buf_ptr += len; } else { - if (*buf_ptr >= 4) { + if (*buf_ptr >= 2) { jump->addr = (sljit_uw)code_ptr; if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); - else - code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); + code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset); + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset); +#else + code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2); +#endif + } jump = jump->next; } else if (*buf_ptr == 0) { - label->addr = (sljit_uw)code_ptr; + label->addr = ((sljit_uw)code_ptr) + executable_offset; label->size = code_ptr - code; label = label->next; } - else if (*buf_ptr == 1) { + else { /* *buf_ptr is 1 */ const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); const_ = const_->next; } - else { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; - buf_ptr++; - *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); - code_ptr += sizeof(sljit_sw); - buf_ptr += sizeof(sljit_sw) - 1; -#else - code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); - buf_ptr += sizeof(sljit_sw); -#endif - } buf_ptr++; } } while (buf_ptr < buf_end); @@ -523,72 +546,75 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { + jump_addr = jump->addr + executable_offset; + if (jump->flags & PATCH_MB) { - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127); - *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))); + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); + *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); } else if (jump->flags & PATCH_MW) { if (jump->flags & JUMP_LABEL) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw)))); #else - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); - *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))); + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32)))); #endif } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw)))); #else - SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); - *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si))); + SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32)))); #endif } } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) else if (jump->flags & PATCH_MD) - *(sljit_sw*)jump->addr = jump->u.label->addr; + sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); #endif jump = jump->next; } - /* Maybe we waste some space because of short jumps. */ + /* Some space may be wasted because of short jumps. */ SLJIT_ASSERT(code_ptr <= code + compiler->size); compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = code_ptr - code; - return (void*)code; + return (void*)(code + executable_offset); } /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ -static sljit_si emit_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); +static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, + sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); -static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w); +static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); -static sljit_si emit_mov(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw); +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); -static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) +static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler) { - sljit_ub *inst; + sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); FAIL_IF(!inst); INC_SIZE(5); #else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); FAIL_IF(!inst); INC_SIZE(6); *inst++ = REX_W; @@ -596,23 +622,23 @@ static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ *inst++ = 0x64; *inst++ = 0x24; - *inst++ = (sljit_ub)sizeof(sljit_sw); + *inst++ = (sljit_u8)sizeof(sljit_sw); *inst++ = PUSHF; compiler->flags_saved = 1; return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) +static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags) { - sljit_ub *inst; + sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); FAIL_IF(!inst); INC_SIZE(5); *inst++ = POPF; #else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); FAIL_IF(!inst); INC_SIZE(6); *inst++ = POPF; @@ -621,7 +647,7 @@ static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ *inst++ = 0x64; *inst++ = 0x24; - *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); + *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw)); compiler->flags_saved = keep_flags; return SLJIT_SUCCESS; } @@ -638,7 +664,7 @@ static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) CPU cycles if the stack is large enough. However, you don't know it in advance, so it must always be called. I think this is a bad design in general even if it has some reasons. */ - *(volatile sljit_si*)alloca(local_size) = 0; + *(volatile sljit_s32*)alloca(local_size) = 0; } #endif @@ -649,11 +675,11 @@ static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) #include "sljitNativeX86_64.c" #endif -static sljit_si emit_mov(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; + sljit_u8* inst; if (dst == SLJIT_UNUSED) { /* No destination, doesn't need to setup flags. */ @@ -717,11 +743,11 @@ static sljit_si emit_mov(struct sljit_compiler *compiler, #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_ub *inst; + sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si size; + sljit_s32 size; #endif CHECK_ERROR(); @@ -729,23 +755,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); *inst = INT3; break; case SLJIT_NOP: - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); *inst = NOP; break; - case SLJIT_LUMUL: - case SLJIT_LSMUL: - case SLJIT_UDIVMOD: - case SLJIT_SDIVMOD: - case SLJIT_UDIVI: - case SLJIT_SDIVI: + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: compiler->flags_saved = 0; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifdef _WIN64 @@ -761,12 +787,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler && reg_map[TMP_REG1] == 2, invalid_register_assignment_for_div_mul); #endif - compiler->mode32 = op & SLJIT_INT_OP; + compiler->mode32 = op & SLJIT_I32_OP; #endif - SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); op = GET_OPCODE(op); - if ((op | 0x2) == SLJIT_UDIVI) { + if ((op | 0x2) == SLJIT_DIV_UW) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); @@ -777,24 +803,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler *inst = XOR_r_rm; } - if ((op | 0x2) == SLJIT_SDIVI) { + if ((op | 0x2) == SLJIT_DIV_SW) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); *inst = CDQ; #else if (compiler->mode32) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); *inst = CDQ; } else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); INC_SIZE(2); *inst++ = REX_W; @@ -804,27 +830,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); INC_SIZE(2); *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); + *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); #else #ifdef _WIN64 - size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2; + size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; #else size = (!compiler->mode32) ? 3 : 2; #endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); #ifdef _WIN64 if (!compiler->mode32) - *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0); - else if (op >= SLJIT_UDIVMOD) + *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); + else if (op >= SLJIT_DIVMOD_UW) *inst++ = REX_B; *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); + *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); #else if (!compiler->mode32) *inst++ = REX_W; @@ -833,26 +859,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #endif #endif switch (op) { - case SLJIT_LUMUL: + case SLJIT_LMUL_UW: *inst |= MUL; break; - case SLJIT_LSMUL: + case SLJIT_LMUL_SW: *inst |= IMUL; break; - case SLJIT_UDIVMOD: - case SLJIT_UDIVI: + case SLJIT_DIVMOD_UW: + case SLJIT_DIV_UW: *inst |= DIV; break; - case SLJIT_SDIVMOD: - case SLJIT_SDIVI: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_SW: *inst |= IDIV; break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) - if (op <= SLJIT_SDIVMOD) + if (op <= SLJIT_DIVMOD_SW) EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #else - if (op >= SLJIT_UDIVI) + if (op >= SLJIT_DIV_UW) EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #endif break; @@ -863,20 +889,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #define ENCODE_PREFIX(prefix) \ do { \ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ FAIL_IF(!inst); \ INC_SIZE(1); \ *inst = (prefix); \ } while (0) -static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; - sljit_si dst_r; + sljit_u8* inst; + sljit_s32 dst_r; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_si work_r; + sljit_s32 work_r; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -1014,12 +1040,12 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, return SLJIT_SUCCESS; } -static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; - sljit_si dst_r; + sljit_u8* inst; + sljit_s32 dst_r; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; @@ -1065,11 +1091,11 @@ static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, return SLJIT_SUCCESS; } -static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; + sljit_u8* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); @@ -1104,11 +1130,11 @@ static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, return SLJIT_SUCCESS; } -static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; + sljit_u8* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); @@ -1144,12 +1170,12 @@ static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; - sljit_si dst_r; + sljit_u8* inst; + sljit_s32 dst_r; SLJIT_UNUSED_ARG(op_flags); if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { @@ -1162,7 +1188,7 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); #else - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0); #endif FAIL_IF(!inst); *inst |= SHR; @@ -1197,8 +1223,8 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, #else dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; compiler->mode32 = 0; - EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); - compiler->mode32 = op_flags & SLJIT_INT_OP; + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31); + compiler->mode32 = op_flags & SLJIT_I32_OP; #endif if (cpu_has_cmov == -1) @@ -1211,7 +1237,7 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, *inst = CMOVNE_r_rm; } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); @@ -1220,7 +1246,7 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, *inst++ = MOV_r_rm; *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; #else - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); FAIL_IF(!inst); INC_SIZE(5); @@ -1235,7 +1261,7 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); #else - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); #endif FAIL_IF(!inst); *(inst + 1) |= XOR; @@ -1253,16 +1279,16 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_ub* inst; - sljit_si update = 0; - sljit_si op_flags = GET_ALL_FLAGS(op); + sljit_u8* inst; + sljit_s32 update = 0; + sljit_s32 op_flags = GET_ALL_FLAGS(op); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_si dst_is_ereg = 0; - sljit_si src_is_ereg = 0; + sljit_s32 dst_is_ereg = 0; + sljit_s32 src_is_ereg = 0; #else # define src_is_ereg 0 #endif @@ -1275,7 +1301,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op_flags & SLJIT_INT_OP; + compiler->mode32 = op_flags & SLJIT_I32_OP; #endif op = GET_OPCODE(op); @@ -1284,20 +1310,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler compiler->mode32 = 0; #endif - if (op_flags & SLJIT_INT_OP) { + if (op_flags & SLJIT_I32_OP) { if (FAST_IS_REG(src) && src == dst) { if (!TYPE_CAST_NEEDED(op)) return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) - op = SLJIT_MOV_UI; - if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) - op = SLJIT_MOVU_UI; - if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) - op = SLJIT_MOV_SI; - if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) - op = SLJIT_MOVU_SI; + if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) + op = SLJIT_MOV_U32; + if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) + op = SLJIT_MOVU_U32; + if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) + op = SLJIT_MOV_S32; + if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) + op = SLJIT_MOVU_S32; #endif } @@ -1309,24 +1335,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler if (src & SLJIT_IMM) { switch (op) { - case SLJIT_MOV_UB: - srcw = (sljit_ub)srcw; + case SLJIT_MOV_U8: + srcw = (sljit_u8)srcw; break; - case SLJIT_MOV_SB: - srcw = (sljit_sb)srcw; + case SLJIT_MOV_S8: + srcw = (sljit_s8)srcw; break; - case SLJIT_MOV_UH: - srcw = (sljit_uh)srcw; + case SLJIT_MOV_U16: + srcw = (sljit_u16)srcw; break; - case SLJIT_MOV_SH: - srcw = (sljit_sh)srcw; + case SLJIT_MOV_S16: + srcw = (sljit_s16)srcw; break; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - case SLJIT_MOV_UI: - srcw = (sljit_ui)srcw; + case SLJIT_MOV_U32: + srcw = (sljit_u32)srcw; break; - case SLJIT_MOV_SI: - srcw = (sljit_si)srcw; + case SLJIT_MOV_S32: + srcw = (sljit_s32)srcw; break; #endif } @@ -1345,7 +1371,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { + if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); dst = TMP_REG1; } @@ -1355,28 +1381,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_MOV: case SLJIT_MOV_P: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: #endif FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); break; - case SLJIT_MOV_UB: + case SLJIT_MOV_U8: FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); break; - case SLJIT_MOV_SB: + case SLJIT_MOV_S8: FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); break; - case SLJIT_MOV_UH: + case SLJIT_MOV_U16: FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); break; - case SLJIT_MOV_SH: + case SLJIT_MOV_S16: FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); break; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - case SLJIT_MOV_UI: + case SLJIT_MOV_U32: FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); break; - case SLJIT_MOV_SI: + case SLJIT_MOV_S32: FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); break; #endif @@ -1452,13 +1478,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #endif -static sljit_si emit_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, + sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_ub* inst; + sljit_u8* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); @@ -1568,13 +1594,13 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_ub* inst; + sljit_u8* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); @@ -1650,13 +1676,13 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static sljit_si emit_mul(struct sljit_compiler *compiler, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_mul(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_ub* inst; - sljit_si dst_r; + sljit_u8* inst; + sljit_s32 dst_r; dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; @@ -1684,30 +1710,30 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); FAIL_IF(!inst); *inst = IMUL_r_rm_i8; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); - *inst = (sljit_sb)src1w; + *inst = (sljit_s8)src1w; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); FAIL_IF(!inst); *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_sw*)inst = src1w; + sljit_unaligned_store_sw(inst, src1w); } #else else if (IS_HALFWORD(src1w)) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); FAIL_IF(!inst); *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_si*)inst = (sljit_si)src1w; + sljit_unaligned_store_s32(inst, (sljit_s32)src1w); } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); @@ -1727,30 +1753,30 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); FAIL_IF(!inst); *inst = IMUL_r_rm_i8; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); - *inst = (sljit_sb)src2w; + *inst = (sljit_s8)src2w; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); FAIL_IF(!inst); *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_sw*)inst = src2w; + sljit_unaligned_store_sw(inst, src2w); } #else else if (IS_HALFWORD(src2w)) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); FAIL_IF(!inst); *inst = IMUL_r_rm_i32; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_si*)inst = (sljit_si)src2w; + sljit_unaligned_store_s32(inst, (sljit_s32)src2w); } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); @@ -1780,13 +1806,13 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_ub* inst; - sljit_si dst_r, done = 0; + sljit_u8* inst; + sljit_s32 dst_r, done = 0; /* These cases better be left to handled by normal way. */ if (!keep_flags) { @@ -1807,7 +1833,7 @@ static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_f } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); #else if (src2 & SLJIT_IMM) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); @@ -1820,7 +1846,7 @@ static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_f else if (FAST_IS_REG(src2)) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); #else if (src1 & SLJIT_IMM) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); @@ -1839,11 +1865,11 @@ static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_f return SLJIT_ERR_UNSUPPORTED; } -static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_ub* inst; + sljit_u8* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { @@ -1890,11 +1916,11 @@ static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static sljit_si emit_test_binary(struct sljit_compiler *compiler, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_ub* inst; + sljit_u8* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { @@ -2000,13 +2026,13 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static sljit_si emit_shift(struct sljit_compiler *compiler, - sljit_ub mode, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_shift(struct sljit_compiler *compiler, + sljit_u8 mode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_ub* inst; + sljit_u8* inst; if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { if (dst == src1 && dstw == src1w) { @@ -2089,11 +2115,11 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, - sljit_ub mode, sljit_si set_flags, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, + sljit_u8 mode, sljit_s32 set_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { /* The CPU does not set flags if the shift count is 0. */ if (src2 & SLJIT_IMM) { @@ -2124,10 +2150,10 @@ static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -2139,7 +2165,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler CHECK_EXTRA_REGS(src1, src1w, (void)0); CHECK_EXTRA_REGS(src2, src2w, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op & SLJIT_INT_OP; + compiler->mode32 = op & SLJIT_I32_OP; #endif if (GET_OPCODE(op) >= SLJIT_MUL) { @@ -2219,7 +2245,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -2229,24 +2255,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) { - sljit_ub *inst; + sljit_u8 *inst; CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); - SLJIT_MEMMOVE(inst, instruction, size); + SLJIT_MEMCPY(inst, instruction, size); return SLJIT_SUCCESS; } @@ -2255,12 +2281,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* --------------------------------------------------------------------- */ /* Alignment + 2 * 16 bytes. */ -static sljit_si sse2_data[3 + (4 + 4) * 2]; -static sljit_si *sse2_buffer; +static sljit_s32 sse2_data[3 + (4 + 4) * 2]; +static sljit_s32 *sse2_buffer; static void init_compiler(void) { - sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf); + sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); /* Single precision constants. */ sse2_buffer[0] = 0x80000000; sse2_buffer[4] = 0x7fffffff; @@ -2271,7 +2297,7 @@ static void init_compiler(void) sse2_buffer[13] = 0x7fffffff; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; @@ -2284,10 +2310,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #endif /* SLJIT_DETECT_SSE2 */ } -static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, - sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) +static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) { - sljit_ub *inst; + sljit_u8 *inst; inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); FAIL_IF(!inst); @@ -2296,10 +2322,10 @@ static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, return SLJIT_SUCCESS; } -static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, - sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) +static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) { - sljit_ub *inst; + sljit_u8 *inst; inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); FAIL_IF(!inst); @@ -2308,31 +2334,31 @@ static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, - sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) { return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); } -static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, - sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) { return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; - sljit_ub *inst; + sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONVW_FROMD) + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) compiler->mode32 = 0; #endif - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; *inst = CVTTSD2SI_r_xm; @@ -2342,29 +2368,29 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; - sljit_ub *inst; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONVD_FROMW) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) compiler->mode32 = 0; #endif if (src & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) - srcw = (sljit_si)srcw; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; #endif EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); src = TMP_REG1; srcw = 0; } - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; *inst = CVTSI2SD_x_rm; @@ -2373,27 +2399,27 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * compiler->mode32 = 1; #endif if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { compiler->flags_saved = 0; if (!FAST_IS_REG(src1)) { - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); src1 = TMP_FREG; } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w); + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) { - sljit_si dst_r; + sljit_s32 dst_r; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; @@ -2402,65 +2428,65 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile CHECK_ERROR(); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - if (GET_OPCODE(op) == SLJIT_DMOV) { + if (GET_OPCODE(op) == SLJIT_MOV_F64) { if (FAST_IS_REG(dst)) - return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); + return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); if (FAST_IS_REG(src)) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src); - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw)); - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); } - if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; if (FAST_IS_REG(src)) { /* We overwrite the high bits of source. From SLJIT point of view, this is not an issue. Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ - FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0)); + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); } else { - FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); src = TMP_FREG; } - FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0)); + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } if (SLOW_IS_REG(dst)) { dst_r = dst; if (dst != src) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); } switch (GET_OPCODE(op)) { - case SLJIT_DNEG: - FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); + case SLJIT_NEG_F64: + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); break; - case SLJIT_DABS: - FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); + case SLJIT_ABS_F64: + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) { - sljit_si dst_r; + sljit_s32 dst_r; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -2476,43 +2502,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile dst_r = dst; if (dst == src1) ; /* Do nothing here. */ - else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) { + else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { /* Swap arguments. */ src2 = src1; src2w = src1w; } else if (dst != src2) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); } } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); } switch (GET_OPCODE(op)) { - case SLJIT_DADD: - FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + case SLJIT_ADD_F64: + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); break; - case SLJIT_DSUB: - FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + case SLJIT_SUB_F64: + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); break; - case SLJIT_DMUL: - FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + case SLJIT_MUL_F64: + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); break; - case SLJIT_DDIV: - FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + case SLJIT_DIV_F64: + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -2522,7 +2548,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) { - sljit_ub *inst; + sljit_u8 *inst; struct sljit_label *label; CHECK_ERROR_PTR(); @@ -2540,7 +2566,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi PTR_FAIL_IF(!label); set_label(label, compiler); - inst = (sljit_ub*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 2); PTR_FAIL_IF(!inst); *inst++ = 0; @@ -2549,9 +2575,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { - sljit_ub *inst; + sljit_u8 *inst; struct sljit_jump *jump; CHECK_ERROR_PTR(); @@ -2578,17 +2604,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); #endif - inst = (sljit_ub*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 2); PTR_FAIL_IF_NULL(inst); *inst++ = 0; - *inst++ = type + 4; + *inst++ = type + 2; return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { - sljit_ub *inst; + sljit_u8 *inst; struct sljit_jump *jump; CHECK_ERROR(); @@ -2636,11 +2662,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil compiler->size += 10 + 3; #endif - inst = (sljit_ub*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 2); FAIL_IF_NULL(inst); *inst++ = 0; - *inst++ = type + 4; + *inst++ = type + 2; } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -2655,18 +2681,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw, - sljit_si type) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw, + sljit_s32 type) { - sljit_ub *inst; - sljit_ub cond_set = 0; + sljit_u8 *inst; + sljit_u8 cond_set = 0; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si reg; + sljit_s32 reg; #else /* CHECK_EXTRA_REGS migh overwrite these values. */ - sljit_si dst_save = dst; + sljit_s32 dst_save = dst; sljit_sw dstw_save = dstw; #endif @@ -2688,7 +2714,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); FAIL_IF(!inst); INC_SIZE(4 + 3); /* Set low register to conditional flag. */ @@ -2704,7 +2730,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; - inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); FAIL_IF(!inst); INC_SIZE(4 + 4); /* Set low register to conditional flag. */ @@ -2733,7 +2759,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { if (reg_map[dst] <= 4) { /* Low byte is accessible. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); FAIL_IF(!inst); INC_SIZE(3 + 3); /* Set low byte to conditional flag. */ @@ -2756,7 +2782,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com /* a xor reg, reg operation would overwrite the flags. */ EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); - inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); FAIL_IF(!inst); INC_SIZE(3); @@ -2767,7 +2793,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_SUCCESS; } - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 3 + 1); *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; @@ -2786,7 +2812,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); if (dst != SLJIT_R0) { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 2 + 1); /* Set low register to conditional flag. */ @@ -2799,7 +2825,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; } else { - inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); FAIL_IF(!inst); INC_SIZE(2 + 3 + 2 + 2); /* Set low register to conditional flag. */ @@ -2817,7 +2843,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com } /* Set TMP_REG1 to the bit. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 3 + 1); *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; @@ -2843,7 +2869,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com #endif /* SLJIT_CONFIG_X86_64 */ } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { CHECK_ERROR(); CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); @@ -2874,12 +2900,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { - sljit_ub *inst; + sljit_u8 *inst; struct sljit_const *const_; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_si reg; + sljit_s32 reg; #endif CHECK_ERROR_PTR(); @@ -2906,7 +2932,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return NULL; #endif - inst = (sljit_ub*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 2); PTR_FAIL_IF(!inst); *inst++ = 0; @@ -2921,16 +2947,84 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { + SLJIT_UNUSED_ARG(executable_offset); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)addr = new_addr - (addr + 4); + sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset); #else - *(sljit_uw*)addr = new_addr; + sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target); #endif } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - *(sljit_sw*)addr = new_constant; + SLJIT_UNUSED_ARG(executable_offset); + sljit_unaligned_store_sw((void*)addr, new_constant); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void) +{ +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else + return 1; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void) +{ + if (cpu_has_cmov == -1) + get_cpu_features(); + return cpu_has_cmov; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, + sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + CHECK_ERROR(); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_x86_is_cmov_available()); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " x86_cmov%s %s%s, ", + !(dst_reg & SLJIT_I32_OP) ? "" : ".i", + jump_names[type & 0xff], JUMP_POSTFIX(type)); + sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = dst_reg & SLJIT_I32_OP; +#endif + dst_reg &= ~SLJIT_I32_OP; + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = get_jump_code(type & 0xff) - 0x40; + return SLJIT_SUCCESS; } diff --git a/pcre2/src/sljit/sljitUtils.c b/pcre2/src/sljit/sljitUtils.c index 5294b5f3f..ec5c32119 100644 --- a/pcre2/src/sljit/sljitUtils.c +++ b/pcre2/src/sljit/sljitUtils.c @@ -163,11 +163,11 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) #include /* Some old systems does not have MAP_ANON. */ -static sljit_si dev_zero = -1; +static sljit_s32 dev_zero = -1; #if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) -static SLJIT_INLINE sljit_si open_dev_zero(void) +static SLJIT_INLINE sljit_s32 open_dev_zero(void) { dev_zero = open("/dev/zero", O_RDWR); return dev_zero < 0; @@ -179,10 +179,13 @@ static SLJIT_INLINE sljit_si open_dev_zero(void) static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER; -static SLJIT_INLINE sljit_si open_dev_zero(void) +static SLJIT_INLINE sljit_s32 open_dev_zero(void) { pthread_mutex_lock(&dev_zero_mutex); - dev_zero = open("/dev/zero", O_RDWR); + /* The dev_zero might be initialized by another thread during the waiting. */ + if (dev_zero < 0) { + dev_zero = open("/dev/zero", O_RDWR); + } pthread_mutex_unlock(&dev_zero_mutex); return dev_zero < 0; } diff --git a/pcre2/testdata/grepinput b/pcre2/testdata/grepinput index 0f00edd93..b01643dbc 100644 --- a/pcre2/testdata/grepinput +++ b/pcre2/testdata/grepinput @@ -604,6 +604,19 @@ AB.VE the turtle 010203040506 +match 1: + a +match 2: + b +match 3: + c +match 4: + d +match 5: + e +Rhubarb +Custard Tart + PUT NEW DATA ABOVE THIS LINE. ============================= diff --git a/pcre2/testdata/grepoutput b/pcre2/testdata/grepoutput index 883bad7f7..87fe428dc 100644 --- a/pcre2/testdata/grepoutput +++ b/pcre2/testdata/grepoutput @@ -10,7 +10,7 @@ RC=0 7:PATTERN at the start of a line. 8:In the middle of a line, PATTERN appears. 10:This pattern is in lower case. -610:Check up on PATTERN near the end. +623:Check up on PATTERN near the end. RC=0 ---------------------------- Test 4 ------------------------------ 4 @@ -19,7 +19,7 @@ RC=0 ./testdata/grepinput:7:PATTERN at the start of a line. ./testdata/grepinput:8:In the middle of a line, PATTERN appears. ./testdata/grepinput:10:This pattern is in lower case. -./testdata/grepinput:610:Check up on PATTERN near the end. +./testdata/grepinput:623:Check up on PATTERN near the end. ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx:5:Pattern ./testdata/grepinputx:42:This line contains pattern not on a line by itself. @@ -28,7 +28,7 @@ RC=0 7:PATTERN at the start of a line. 8:In the middle of a line, PATTERN appears. 10:This pattern is in lower case. -610:Check up on PATTERN near the end. +623:Check up on PATTERN near the end. 3:Here is the pattern again. 5:Pattern 42:This line contains pattern not on a line by itself. @@ -324,10 +324,10 @@ RC=0 ./testdata/grepinput-9- ./testdata/grepinput:10:This pattern is in lower case. -- -./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE. -./testdata/grepinput-608-============================= -./testdata/grepinput-609- -./testdata/grepinput:610:Check up on PATTERN near the end. +./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE. +./testdata/grepinput-621-============================= +./testdata/grepinput-622- +./testdata/grepinput:623:Check up on PATTERN near the end. -- ./testdata/grepinputx-1-This is a second file of input for the pcregrep tests. ./testdata/grepinputx-2- @@ -349,8 +349,8 @@ RC=0 ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long. ./testdata/grepinput-13- -- -./testdata/grepinput:610:Check up on PATTERN near the end. -./testdata/grepinput-611-This is the last line of this file. +./testdata/grepinput:623:Check up on PATTERN near the end. +./testdata/grepinput-624-This is the last line of this file. -- ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx-4- @@ -456,8 +456,8 @@ over the lazy dog. This time it jumps and jumps and jumps. RC=0 ---------------------------- Test 52 ------------------------------ -fox jumps -This time it jumps and jumps and jumps. +fox jumps +This time it jumps and jumps and jumps. RC=0 ---------------------------- Test 53 ------------------------------ 36972,6 @@ -474,9 +474,9 @@ RC=0 597:32,4 RC=0 ---------------------------- Test 55 ----------------------------- -Here is the pattern again. -That time it was on a line by itself. -This line contains pattern not on a line by itself. +Here is the pattern again. +That time it was on a line by itself. +This line contains pattern not on a line by itself. RC=0 ---------------------------- Test 56 ----------------------------- ./testdata/grepinput:456 @@ -588,56 +588,57 @@ RC=0 ---------------------------- Test 70 ----------------------------- triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt -triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt -triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt -triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt -RC=0 +RC=0 ---------------------------- Test 71 ----------------------------- 01 RC=0 ---------------------------- Test 72 ----------------------------- -010203040506 +010203040506 RC=0 ---------------------------- Test 73 ----------------------------- -01 +01 RC=0 ---------------------------- Test 74 ----------------------------- 01 02 RC=0 ---------------------------- Test 75 ----------------------------- -010203040506 +010203040506 RC=0 ---------------------------- Test 76 ----------------------------- -01 -02 +01 +02 RC=0 ---------------------------- Test 77 ----------------------------- 01 03 RC=0 ---------------------------- Test 78 ----------------------------- -010203040506 +010203040506 RC=0 ---------------------------- Test 79 ----------------------------- -01 -03 +01 +03 RC=0 ---------------------------- Test 80 ----------------------------- 01 RC=0 ---------------------------- Test 81 ----------------------------- -010203040506 +010203040506 RC=0 ---------------------------- Test 82 ----------------------------- -01 +01 RC=0 ---------------------------- Test 83 ----------------------------- pcre2grep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer -pcre2grep: check the --buffer-size option +pcre2grep: the maximum buffer size is 100 +pcre2grep: use the --max-buffer-size option to change it RC=2 ---------------------------- Test 84 ----------------------------- testdata/grepinputv:fox jumps @@ -701,9 +702,9 @@ RC=0 ./testdata/grepinput:zerothe. RC=0 ---------------------------- Test 101 ------------------------------ -./testdata/grepinput:.|zero|the|. -./testdata/grepinput:zero|a -./testdata/grepinput:.|zero|the|. +./testdata/grepinput:.|zero|the|. +./testdata/grepinput:zero|a +./testdata/grepinput:.|zero|the|. RC=0 ---------------------------- Test 102 ----------------------------- 2: @@ -724,21 +725,21 @@ RC=0 14: RC=0 ---------------------------- Test 105 ----------------------------- -triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt - -triple: t2_txt s1_tag s_txt p_tag p_txt o_tag -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. - -triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt - -triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt - -triple: t5_txt s1_tag s_txt p_tag p_txt o_tag -o_txt - -triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt - -triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t2_txt s1_tag s_txt p_tag p_txt o_tag +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t5_txt s1_tag s_txt p_tag p_txt o_tag +o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt RC=0 ---------------------------- Test 106 ----------------------------- a @@ -751,3 +752,80 @@ RC=0 2:3,1 2:4,1 RC=0 +---------------------------- Test 108 ------------------------------ +RC=0 +---------------------------- Test 109 ----------------------------- +RC=0 +---------------------------- Test 110 ----------------------------- +match 1: + a +/1/a +match 2: + b +/2/b +match 3: + c +/3/c +match 4: + d +/4/d +match 5: + e +/5/e +RC=0 +---------------------------- Test 111 ----------------------------- +607:0,12 +609:0,12 +611:0,12 +613:0,12 +615:0,12 +RC=0 +---------------------------- Test 112 ----------------------------- +37168,12 +37180,12 +37192,12 +37204,12 +37216,12 +RC=0 +---------------------------- Test 113 ----------------------------- +476 +RC=0 +---------------------------- Test 114 ----------------------------- +testdata/grepinput:469 +testdata/grepinput3:0 +testdata/grepinput8:0 +testdata/grepinputv:1 +testdata/grepinputx:6 +TOTAL:476 +RC=0 +---------------------------- Test 115 ----------------------------- +testdata/grepinput:469 +testdata/grepinputv:1 +testdata/grepinputx:6 +TOTAL:476 +RC=0 +---------------------------- Test 116 ----------------------------- +476 +RC=0 +---------------------------- Test 117 ----------------------------- +469 +0 +0 +1 +6 +476 +RC=0 +---------------------------- Test 118 ----------------------------- +testdata/grepinput3 +testdata/grepinput8 +RC=0 +---------------------------- Test 119 ----------------------------- +123 +456 +789 +--- +abc +def +xyz +--- +RC=0 diff --git a/pcre2/testdata/grepoutputC b/pcre2/testdata/grepoutputC new file mode 100644 index 000000000..0116645d6 --- /dev/null +++ b/pcre2/testdata/grepoutputC @@ -0,0 +1,8 @@ +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +Arg1: [T] [his] [s] Arg2: |T| () () (0) +The quick brown +This time it jumps and jumps and jumps. +Arg1: [qu] [qu] +Arg1: [ t] [ t] +The quick brown +This time it jumps and jumps and jumps. diff --git a/pcre2/testdata/testinput1 b/pcre2/testdata/testinput1 index 884faf1c7..2de772844 100644 --- a/pcre2/testdata/testinput1 +++ b/pcre2/testdata/testinput1 @@ -3,12 +3,14 @@ # 32-bit PCRE libraries, and also using the perltest.pl script. #forbid_utf +#newline_default lf any anycrlf #perltest /the quick brown fox/ the quick brown fox - The quick brown FOX What do you know about the quick brown fox? +\= Expect no match + The quick brown FOX What do you know about THE QUICK BROWN FOX? /The quick brown fox/i @@ -51,7 +53,7 @@ >>>aaabxyzpqrrrabbxyyyypqAzz >aaaabxyzpqrrrabbxyyyypqAzz >>>>abcxyzpqrrrabbxyyyypqAzz - *** Failers +\= Expect no match abxyzpqrrabbxyyyypqAzz abxyzpqrrrrabbxyyyypqAzz abxyzpqrrrabxyyyypqAzz @@ -62,7 +64,7 @@ /^(abc){1,2}zz/ abczz abcabczz - *** Failers +\= Expect no match zz abcabcabczz >>abczz @@ -76,7 +78,7 @@ aac abbbbbbbbbbbc bbbbbbbbbbbac - *** Failers +\= Expect no match aaac abbbbbbbbbbbac @@ -89,7 +91,7 @@ aac abbbbbbbbbbbc bbbbbbbbbbbac - *** Failers +\= Expect no match aaac abbbbbbbbbbbac @@ -100,7 +102,7 @@ babc bbabc bababc - *** Failers +\= Expect no match bababbc babababc @@ -108,7 +110,7 @@ babc bbabc bababc - *** Failers +\= Expect no match bababbc babababc @@ -122,7 +124,7 @@ cthing dthing ething - *** Failers +\= Expect no match fthing [thing \\thing @@ -132,7 +134,7 @@ cthing dthing ething - *** Failers +\= Expect no match athing fthing @@ -140,7 +142,7 @@ fthing [thing \\thing - *** Failers +\= Expect no match athing bthing ]thing @@ -151,7 +153,7 @@ /^[^]cde]/ athing fthing - *** Failers +\= Expect no match ]thing cthing dthing @@ -176,7 +178,7 @@ 9 10 100 - *** Failers +\= Expect no match abc /^.*nter/ @@ -187,28 +189,28 @@ /^xxx[0-9]+$/ xxx0 xxx1234 - *** Failers +\= Expect no match xxx /^.+[0-9][0-9][0-9]$/ x123 + x1234 xx123 123456 - *** Failers +\= Expect no match 123 - x1234 /^.+?[0-9][0-9][0-9]$/ x123 + x1234 xx123 123456 - *** Failers +\= Expect no match 123 - x1234 /^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ abc!pqr=apquxz.ixr.zzz.ac.uk - *** Failers +\= Expect no match !pqr=apquxz.ixr.zzz.ac.uk abc!=apquxz.ixr.zzz.ac.uk abc!pqr=apquxz:ixr.zzz.ac.uk @@ -216,7 +218,8 @@ /:/ Well, we need a colon: somewhere - *** Fail if we don't +\= Expect no match + Fail without a colon /([\da-f:]+)$/i 0abc @@ -227,7 +230,7 @@ 5f03:12C0::932e fed def Any old stuff - *** Failers +\= Expect no match 0zzz gzzz fed\x20 @@ -236,7 +239,7 @@ /^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/ .1.2.3 A.12.123.0 - *** Failers +\= Expect no match .1.2.3333 1.2.3 1234.2.3 @@ -244,7 +247,7 @@ /^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ 1 IN SOA non-sp1 non-sp2( 1 IN SOA non-sp1 non-sp2 ( - *** Failers +\= Expect no match 1IN SOA non-sp1 non-sp2( /^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/ @@ -254,7 +257,7 @@ ab-c.pq-r. sxk.zzz.ac.uk. x-.y-. - *** Failers +\= Expect no match -abc.peq. /^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/ @@ -262,7 +265,7 @@ *.b0-a *.c3-b.c *.c-a.b-c - *** Failers +\= Expect no match *.0 *.a- *.a-b.c- @@ -286,29 +289,30 @@ \"1234\" \"abcd\" ; \"\" ; rhubarb - *** Failers +\= Expect no match \"1234\" : things /^$/ \ - *** Failers +\= Expect no match + A non-empty line / ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x ab c - *** Failers +\= Expect no match abc ab cde /(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ ab c - *** Failers +\= Expect no match abc ab cde /^ a\ b[c ]d $/x a bcd a b d - *** Failers +\= Expect no match abcd ab d @@ -362,7 +366,7 @@ 1234567890 12345678ab 12345678__ - *** Failers +\= Expect no match 1234567 /^[aeiou\d]{4,5}$/ @@ -370,7 +374,7 @@ 1234 12345 aaaaa - *** Failers +\= Expect no match 123456 /^[aeiou\d]{4,5}?/ @@ -383,7 +387,7 @@ /\A(abc|def)=(\1){2,3}\Z/ abc=abcabc def=defdefdef - *** Failers +\= Expect no match abc=defdef /^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ @@ -402,7 +406,7 @@ /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ From abcd Mon Sep 01 12:33:02 1997 From abcd Mon Sep 1 12:33:02 1997 - *** Failers +\= Expect no match From abcd Sep 01 12:33:02 1997 /^12.34/s @@ -423,7 +427,7 @@ /^(\D*)(?=\d)(?!123)/ abc456 - *** Failers +\= Expect no match abc123 /^1234(?# test newlines @@ -449,12 +453,12 @@ /(?!^)abc/ the abc - *** Failers +\= Expect no match abc /(?=^)abc/ abc - *** Failers +\= Expect no match the abc /^[ab]{1,3}(ab*|b)/ @@ -670,7 +674,7 @@ A. Other (a comment) \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay A missing angle (a comment) \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay A missing angle .*/)foo" - /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ - "(?>.*/)foo" /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo +\= Expect no match + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ /(?>(\.\d\d[1-9]?))\d+/ 1.230003938 1.875000282 - *** Failers +\= Expect no match 1.235 /^((?>\w+)|(?>\s+))*$/ now is the time for all good men to come to the aid of the party - *** Failers +\= Expect no match this is not a line with only words and spaces! /(\d+)(\w)/ @@ -1999,7 +1995,7 @@ /((?>\d+))(\w)/ 12345a - *** Failers +\= Expect no match 12345+ /(?>a+)b/ @@ -2023,32 +2019,32 @@ /\(((?>[^()]+)|\([^()]+\))+\)/ (abc) (abc(def)xyz) - *** Failers +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /a(?-i)b/i ab Ab - *** Failers +\= Expect no match aB AB /(a (?x)b c)d e/ a bcd e - *** Failers +\= Expect no match a b cd e abcd e a bcde /(a b(?x)c d (?-x)e f)/ a bcde f - *** Failers +\= Expect no match abcdef /(a(?i)b)c/ abc aBc - *** Failers +\= Expect no match abC aBC Abc @@ -2059,7 +2055,7 @@ /a(?i:b)c/ abc aBc - *** Failers +\= Expect no match ABC abC aBC @@ -2067,14 +2063,14 @@ /a(?i:b)*c/ aBc aBBc - *** Failers +\= Expect no match aBC aBBC /a(?=b(?i)c)\w\wd/ abcd abCd - *** Failers +\= Expect no match aBCd abcD @@ -2082,7 +2078,7 @@ more than million more than MILLION more \n than Million - *** Failers +\= Expect no match MORE THAN MILLION more \n than \n million @@ -2090,7 +2086,7 @@ more than million more than MILLION more \n than Million - *** Failers +\= Expect no match MORE THAN MILLION more \n than \n million @@ -2098,7 +2094,7 @@ abc aBbc aBBc - *** Failers +\= Expect no match Abc abAb abbC @@ -2106,7 +2102,7 @@ /(?=a(?i)b)\w\wc/ abc aBc - *** Failers +\= Expect no match Ab abC aBC @@ -2114,7 +2110,7 @@ /(?<=a(?i)b)(\w\w)c/ abxxc aBxxc - *** Failers +\= Expect no match Abxxc ABxxc abxxC @@ -2122,7 +2118,7 @@ /(?:(a)|b)(?(1)A|B)/ aA bB - *** Failers +\= Expect no match aB bA @@ -2130,7 +2126,7 @@ aa b bb - *** Failers +\= Expect no match ab # Perl gets this next one wrong if the pattern ends with $; in that case it @@ -2140,13 +2136,13 @@ abc: 12 123 - *** Failers +\= Expect no match xyz /^(?(?!abc)\d\d|\w{3}:)$/ abc: 12 - *** Failers +\= Expect no match 123 xyz @@ -2155,7 +2151,7 @@ cat fcat focat - *** Failers +\= Expect no match foocat /(?(?(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/ a @@ -3561,7 +3532,7 @@ the.quick.brown.fox a100.b200.300c 12-ab.1245 - *** Failers +\= Expect no match \ .a -a @@ -3579,38 +3550,40 @@ /(?>.*)(?<=(abcd|wxyz))/ alphabetabcd endingwxyz - *** Failers +\= Expect no match a rather long string that doesn't end with one of them /word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark /word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope /(?<=\d{3}(?!999))foo/ 999foo 123999foo - *** Failers +\= Expect no match 123abcfoo /(?<=(?!...999)\d{3})foo/ 999foo 123999foo - *** Failers +\= Expect no match 123abcfoo /(?<=\d{3}(?!999)...)foo/ 123abcfoo 123456foo - *** Failers +\= Expect no match 123999foo /(?<=\d{3}...)(?a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /(?:a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /\Z/g abc\n /^(?s)(?>.*)(?a+|ab)+?c/ +\= Expect no match aabc /(?>a+|ab)+c/ +\= Expect no match aabc /(?:a+|ab)+c/ @@ -4170,9 +4157,11 @@ ab /^(?:a|ab)++c/ +\= Expect no match aaaabc /^(?>a|ab)++c/ +\= Expect no match aaaabc /^(?:a|ab)+c/ @@ -4180,24 +4169,24 @@ /(?=abc){3}abc/aftertext abcabcabc - ** Failers +\= Expect no match xyz /(?=abc)+abc/aftertext abcabcabc - ** Failers +\= Expect no match xyz /(?=abc)++abc/aftertext abcabcabc - ** Failers +\= Expect no match xyz /(?=abc){0}xyz/ xyz /(?=abc){1}xyz/ - ** Failers +\= Expect no match xyz /(?=(a))?./ @@ -4221,7 +4210,7 @@ /^[\g]+/ ggg<<>> - ** Failers +\= Expect no match \\ga /^[\ga]+/ @@ -4238,12 +4227,12 @@ /(?<=a{2})b/i xaabc - ** Failers +\= Expect no match xabc /(?XNNNYZ > X NYQZ - ** Failers +\= Expect no match >XYZ > X NY Z @@ -4348,10 +4331,10 @@ /^abc\K/aftertext abcdef - ** Failers +\= Expect no match defabcxyz -/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/ +/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-2}Z/ ababababbbabZXXXX /(?tom|bon)-\g{A}/ @@ -4359,19 +4342,20 @@ bon-bon /(^(a|b\g{-1}))/ +\= Expect no match bacxxx /(?|(abc)|(xyz))\1/ abcabc xyzxyz - ** Failers +\= Expect no match abcxyz xyzabc /(?|(abc)|(xyz))(?1)/ abcabc xyzabc - ** Failers +\= Expect no match xyzxyz /^X(?5)(a)(?|(b)|(q))(c)(d)(Y)/ @@ -4386,14 +4370,14 @@ /(?'abc'\w+):\k{2}/ a:aaxyz ab:ababxyz - ** Failers +\= Expect no match a:axyz ab:abxyz /(?'abc'\w+):\g{abc}{2}/ a:aaxyz ab:ababxyz - ** Failers +\= Expect no match a:axyz ab:abxyz @@ -4421,7 +4405,7 @@ 1.2.3.4 131.111.10.206 10.0.0.0 - ** Failers +\= Expect no match 10.6 455.3.4.5 @@ -4429,18 +4413,18 @@ 1.2.3.4 131.111.10.206 10.0.0.0 - ** Failers +\= Expect no match 10.6 455.3.4.5 /^(\w++|\s++)*$/ now is the time for all good men to come to the aid of the party - *** Failers +\= Expect no match this is not a line with only words and spaces! /(\d++)(\w)/ 12345a - *** Failers +\= Expect no match 12345+ /a++b/ @@ -4458,14 +4442,14 @@ /\(([^()]++|\([^()]+\))+\)/ (abc) (abc(def)xyz) - *** Failers +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /^([^()]|\((?1)*\))*$/ abc a(b)c a(b(c))d - *** Failers) +\= Expect no match) a(b(c)d /^>abc>([^()]|\((?1)*\))* hij> def> - *** Failers +\= Expect no match a)(?<=b(?&X))/ @@ -4583,33 +4574,33 @@ /^(?|(abc)|(def))\1/ abcabc defdef - ** Failers +\= Expect no match abcdef defabc /^(?|(abc)|(def))(?1)/ abcabc defabc - ** Failers +\= Expect no match defdef abcdef /(?:a(? (?')|(?")) |b(? (?')|(?")) ) (?('quote')[a-z]+|[0-9]+)/x,dupnames a\"aaaaa b\"aaaaa - ** Failers +\= Expect no match b\"11111 /(?:(?1)|B)(A(*F)|C)/ ABCD CCD - ** Failers +\= Expect no match CAD /^(?:(?1)|B)(A(*F)|C)/ CCD BCD - ** Failers +\= Expect no match ABCD CAD BAD @@ -4620,7 +4611,7 @@ BAD BCD BAX - ** Failers +\= Expect no match ACX ABC @@ -4634,12 +4625,12 @@ (ab(cd)ef) /^(?=a(*SKIP)b|ac)/ - ** Failers +\= Expect no match ac /^(?=a(*PRUNE)b)/ ab - ** Failers +\= Expect no match ac /^(?=a(*ACCEPT)b)/ @@ -4682,27 +4673,31 @@ # I think is a Perl bug. /A(*COMMIT)(B|D)/ +\= Expect no match ACABX # Check the use of names for failure /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/mark - ** Failers +\= Expect no match AC CB /(*MARK:A)(*SKIP:B)(C|X)/mark C +\= Expect no match D /^(A(*THEN:A)B|C(*THEN:B)D)/mark - ** Failers +\= Expect no match CB /^(?:A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match CB /^(?>A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match CB # This should succeed, as the skip causes bump to offset 1 (the mark). Note @@ -4724,6 +4719,7 @@ AAAC /(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match AAAC # This should succeed, as a non-existent skip name disables the skip. @@ -4739,7 +4735,7 @@ /(*COMMIT)(A|P)(B|P)(C|P)/ ABCDEFG - ** Failers +\= Expect no match DEFGABC # COMMIT inside an atomic group can't stop backtracking over the group. @@ -4748,6 +4744,7 @@ abbb /(\w+)b(*COMMIT)\w{2}/ +\= Expect no match abbb # Check opening parens in comment when seeking forward reference. @@ -4758,9 +4755,11 @@ # COMMIT should override THEN. /(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?/ +\= Expect no match yes /(?>(*COMMIT)(yes|no)(*THEN)(*F))?/ +\= Expect no match yes /b?(*SKIP)c/ @@ -4768,9 +4767,11 @@ abc /(*SKIP)bc/ +\= Expect no match a /(*SKIP)b/ +\= Expect no match a /(?P(?P=abn)xxx|)+/ @@ -4779,7 +4780,7 @@ /(?i:([^b]))(?1)/ aa aA - ** Failers +\= Expect no match ab aB Ba @@ -4787,7 +4788,7 @@ /^(?&t)*+(?(DEFINE)(?a))\w$/ aaaaaaX - ** Failers +\= Expect no match aaaaaa /^(?&t)*(?(DEFINE)(?a))\w$/ @@ -4797,24 +4798,24 @@ /^(a)*+(\w)/ aaaaX YZ - ** Failers +\= Expect no match aaaa /^(?:a)*+(\w)/ aaaaX YZ - ** Failers +\= Expect no match aaaa /^(a)++(\w)/ aaaaX - ** Failers +\= Expect no match aaaa YZ /^(?:a)++(\w)/ aaaaX - ** Failers +\= Expect no match aaaa YZ @@ -4828,13 +4829,13 @@ /^(a){2,}+(\w)/ aaaaX - ** Failers +\= Expect no match aaa YZ /^(?:a){2,}+(\w)/ aaaaX - ** Failers +\= Expect no match aaa YZ @@ -4844,12 +4845,12 @@ aab /(a)++(?1)b/ - ** Failers +\= Expect no match ab aab /(a)*+(?1)b/ - ** Failers +\= Expect no match ab aab @@ -4884,6 +4885,7 @@ aaaab /^(a)(?1)++ab/ +\= Expect no match aaaab /^(?=a(*:M))aZ/mark @@ -4933,6 +4935,7 @@ name)/mark # Capture /^.*? (a(*THEN)b) c/x +\= Expect no match aabc /^.*? (a(*THEN)b|(*F)) c/x @@ -4942,11 +4945,13 @@ name)/mark aabc /^.*? ( (a(*THEN)b) ) c/x +\= Expect no match aabc # Non-capture /^.*? (?:a(*THEN)b) c/x +\= Expect no match aabc /^.*? (?:a(*THEN)b|(*F)) c/x @@ -4956,11 +4961,13 @@ name)/mark aabc /^.*? (?: (?:a(*THEN)b) ) c/x +\= Expect no match aabc # Atomic /^.*? (?>a(*THEN)b) c/x +\= Expect no match aabc /^.*? (?>a(*THEN)b|(*F)) c/x @@ -4970,11 +4977,13 @@ name)/mark aabc /^.*? (?> (?>a(*THEN)b) ) c/x +\= Expect no match aabc # Possessive capture /^.*? (a(*THEN)b)++ c/x +\= Expect no match aabc /^.*? (a(*THEN)b|(*F))++ c/x @@ -4984,11 +4993,13 @@ name)/mark aabc /^.*? ( (a(*THEN)b)++ )++ c/x +\= Expect no match aabc # Possessive non-capture /^.*? (?:a(*THEN)b)++ c/x +\= Expect no match aabc /^.*? (?:a(*THEN)b|(*F))++ c/x @@ -4998,6 +5009,7 @@ name)/mark aabc /^.*? (?: (?:a(*THEN)b)++ )++ c/x +\= Expect no match aabc # Condition assertion @@ -5008,12 +5020,14 @@ name)/mark # Condition /^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match ba /^.*?(?:(?(?=a)a|b(*THEN)c)|d)/ ba /^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match ac # Assertion @@ -5037,7 +5051,7 @@ name)/mark /(?<=(a(*COMMIT)b))c/ xabcd - ** Failers +\= Expect no match xacd /(?.*?a)b/s aab @@ -5235,6 +5247,7 @@ name)/mark aab /(?>^a)b/s +\= Expect no match aab /(?>.*?)(?<=(abcd)|(wxyz))/ @@ -5246,6 +5259,7 @@ name)/mark endingwxyz "(?>.*)foo" +\= Expect no match abcdfooxyz "(?>.*?)foo" @@ -5258,6 +5272,7 @@ name)/mark ac /(?<=(*SKIP)ac)a/ +\= Expect no match aa /A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/x,mark @@ -5331,6 +5346,7 @@ name)/mark # This gives "no match", as expected. /aaaaa(*COMMIT)b|a+c/ +\= Expect no match aaaaaac # ---- Tests using THEN ---- @@ -5389,6 +5405,7 @@ name)/mark /(a(*COMMIT)b)c|abd/ abc +\= Expect no match abd /(?=a(*COMMIT)b)abc|abd/ @@ -5400,14 +5417,16 @@ name)/mark abd /a(?=b(*COMMIT)c)[^d]|abd/ + abc +\= Expect no match abd - abc /a(?=bc).|abd/ abd abc /a(?>b(*COMMIT)c)d|abd/ +\= Expect no match abceabd /a(?>bc)d|abd/ @@ -5417,6 +5436,7 @@ name)/mark abd /(?>a(*COMMIT)c)d|abd/ +\= Expect no match abd /((?=a(*COMMIT)b)ab|ac){0}(?:(?1)|a(c))/ @@ -5426,7 +5446,7 @@ name)/mark # made them compatible. /^(a)?(?(1)a|b)+$/ - *** Failers +\= Expect no match a /(?=a\Kb)ab/ @@ -5441,58 +5461,73 @@ name)/mark /^abc(?foo)|(?bar))\k/dupnames @@ -5590,7 +5631,7 @@ name)/mark /(?A)(?:(?foo)|(?bar))\k/dupnames AfooA AbarA - ** Failers +\= Expect no match Afoofoo Abarbar @@ -5641,6 +5682,7 @@ name)/mark ababc /(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc/ +\= Expect no match acb '\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' @@ -5671,6 +5713,7 @@ name)/mark bbbaaabaabb /(?:(?P=same)?(?:(?P=same)(?Pa)(?P=same)|(?P=same)?(?Pb)(?P=same)){2}(?P=same)(?Pc)(?P=same)){2}(?Pz)?/g,dupnames +\= Expect no match bbbaaaccccaaabbbcc /(?Pa)?(?Pb)?(?()c|d)*l/ @@ -5727,4 +5770,65 @@ name)/mark "(?|(\k'Pm')|(?'Pm'))" abcd +/(?|(aaa)|(b))\g{1}/ + aaaaaa + bb + +/(?|(aaa)|(b))(?1)/ + aaaaaa + baaa +\= Expect no match + bb + +/(?|(aaa)|(b))/ + xaaa + xbc + +/(?|(?'a'aaa)|(?'a'b))\k'a'/ + aaaaaa + bb + +/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/dupnames + aaaccccaaa + bccccb + +# /x does not apply to MARK labels + +/x (*MARK:ab cd # comment +ef) x/x,mark + axxz + +/(?<=a(B){0}c)X/ + acX + +/(?b)(?(DEFINE)(a+))(?&DEFINE)/ + bbbb +\= Expect no match + baaab + +/(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])/ + \ Fred:099 + +/(?=.*X)X$/ + \ X + +/(?s)(?=.*?)b/ + aabc + +/(Z)(a)\2{1,2}?(?-i)\1X/i + ZaAAZX + +/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ + +/[s[:digit:]\E-H]+/ + s09-H + +/[s[:digit:]\Q\E-H]+/ + s09-H + +/a+(?:|b)a/ + aaaa + +/X?(R||){3335}/ + # End of testinput1 diff --git a/pcre2/testdata/testinput10 b/pcre2/testdata/testinput10 index a1fdd928a..85d2005d7 100644 --- a/pcre2/testdata/testinput10 +++ b/pcre2/testdata/testinput10 @@ -1,45 +1,7 @@ # This set of tests is for UTF-8 support and Unicode property support, with # relevance only for the 8-bit library. -/X(\C{3})/utf - X\x{1234} - -/X(\C{4})/utf - X\x{1234}YZ - -/X\C*/utf - XYZabcdce - -/X\C*?/utf - XYZabcde - -/X\C{3,5}/utf - Xabcdefg - X\x{1234} - X\x{1234}YZ - X\x{1234}\x{512} - X\x{1234}\x{512}YZ - -/X\C{3,5}?/utf - Xabcdefg - X\x{1234} - X\x{1234}YZ - X\x{1234}\x{512} - -/a\Cb/utf - aXb - a\nb - -/a\C\Cb/utf - a\x{100}b - -/ab\Cde/utf - abXde - -/a\C\Cb/utf - a\x{100}b - ** Failers - a\x{12257}b +# The next 4 patterns have UTF-8 errors /[Ã]/utf @@ -47,7 +9,12 @@ /ÃÃÃxxx/utf +/‚‚‚‚‚‚‚Ã/utf + +# Now test subjects + /badutf/utf +\= Expect UTF-8 errors X\xdf XX\xef XXX\xef\x80 @@ -89,11 +56,13 @@ \xff /badutf/utf +\= Expect UTF-8 errors XX\xfb\x80\x80\x80\x80 XX\xfd\x80\x80\x80\x80\x80 XX\xf7\xbf\xbf\xbf /shortutf/utf +\= Expect UTF-8 errors XX\xdf\=ph XX\xef\=ph XX\xef\x80\=ph @@ -111,6 +80,7 @@ \xfd\x80\x80\x80\x80\=ph /anything/utf +\= Expect UTF-8 errors X\xc0\x80 XX\xc1\x8f XXX\xe0\x9f\x80 @@ -119,20 +89,57 @@ \xfc\x83\x80\x80\x80\x80 \xfe\x80\x80\x80\x80\x80 \xff\x80\x80\x80\x80\x80 + \xf8\x88\x80\x80\x80 + \xf9\x87\x80\x80\x80 + \xfc\x84\x80\x80\x80\x80 + \xfd\x83\x80\x80\x80\x80 +\= Expect no match \xc3\x8f \xe0\xaf\x80 \xe1\x80\x80 \xf0\x9f\x80\x80 \xf1\x8f\x80\x80 - \xf8\x88\x80\x80\x80 - \xf9\x87\x80\x80\x80 - \xfc\x84\x80\x80\x80\x80 - \xfd\x83\x80\x80\x80\x80 \xf8\x88\x80\x80\x80\=no_utf_check \xf9\x87\x80\x80\x80\=no_utf_check \xfc\x84\x80\x80\x80\x80\=no_utf_check \xfd\x83\x80\x80\x80\x80\=no_utf_check + +# Similar tests with offsets +/badutf/utf +\= Expect UTF-8 errors + X\xdfabcd + X\xdfabcd\=offset=1 +\= Expect no match + X\xdfabcd\=offset=2 + +/(?<=x)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd + X\xdfabcd\=offset=1 + X\xdfabcd\=offset=2 + X\xdfabcd\xdf\=offset=3 +\= Expect no match + X\xdfabcd\=offset=3 + +/(?<=xx)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd + X\xdfabcd\=offset=1 + X\xdfabcd\=offset=2 + X\xdfabcd\=offset=3 + +/(?<=xxxx)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd + X\xdfabcd\=offset=1 + X\xdfabcd\=offset=2 + X\xdfabcd\=offset=3 + X\xdfabc\xdf\=offset=6 + X\xdfabc\xdf\=offset=7 +\= Expect no match + X\xdfabcd\=offset=6 + /\x{100}/IB,utf /\x{1000}/IB,utf @@ -167,27 +174,12 @@ /\x{212ab}/IB,utf -# This one is here not because it's different to Perl, but because the way -# the captured single-byte is displayed. (In Perl it becomes a character, and you -# can't tell the difference.) - -/X(\C)(.*)/utf - X\x{1234} - X\nabc - -# This one is here because Perl gives out a grumbly error message (quite -# correctly, but that messes up comparisons). - -/a\Cb/utf - *** Failers - a\x{100}b - /[^ab\xC0-\xF0]/IB,utf \x{f1} \x{bf} \x{100} \x{1000} - *** Failers +\= Expect no match \x{c0} \x{f0} @@ -214,7 +206,6 @@ \x{100} Z\x{100} \x{100}Z - *** Failers /[\xff]/IB,utf >\x{ff}< @@ -236,21 +227,23 @@ # This tests the stricter UTF-8 check according to RFC 3629. /X/utf +\= Expect UTF-8 errors \x{d800} - \x{d800}\=no_utf_check \x{da00} - \x{da00}\=no_utf_check \x{dfff} - \x{dfff}\=no_utf_check \x{110000} - \x{110000}\=no_utf_check \x{2000000} - \x{2000000}\=no_utf_check \x{7fffffff} +\= Expect no match + \x{d800}\=no_utf_check + \x{da00}\=no_utf_check + \x{dfff}\=no_utf_check + \x{110000}\=no_utf_check + \x{2000000}\=no_utf_check \x{7fffffff}\=no_utf_check /(*UTF8)\x{1234}/ - abcd\x{1234}pqr + abcd\x{1234}pqr /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I @@ -290,11 +283,14 @@ /a+/utf a\x{123}aa\=offset=1 - a\x{123}aa\=offset=2 a\x{123}aa\=offset=3 a\x{123}aa\=offset=4 - a\x{123}aa\=offset=5 +\= Expect bad offset value a\x{123}aa\=offset=6 +\= Expect bad UTF-8 offset + a\x{123}aa\=offset=2 +\= Expect no match + a\x{123}aa\=offset=5 /\x{1234}+/Ii,utf @@ -395,7 +391,6 @@ Z\x{100} \x{100} \x{100}Z - *** Failers /[z-\x{100}]/IB,utf @@ -421,7 +416,7 @@ \x{104} \x{105} \x{109} - ** Failers +\= Expect no match \x{100} \x{10a} @@ -435,7 +430,7 @@ \x{ff} \x{100} \x{101} - ** Failers +\= Expect no match \x{102} Y y @@ -445,6 +440,22 @@ /\x{3a3}B/IBi,utf /abc/utf,replace=à - abc + abc + +/(?<=(a)(?-1))x/I,utf + a\x80zx\=offset=3 + +/[\W\p{Any}]/B + abc + 123 + +/[\W\pL]/B + abc +\= Expect no match + 123 + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf + +/[\s[:^ascii:]]/B,ucp # End of testinput10 diff --git a/pcre2/testdata/testinput11 b/pcre2/testdata/testinput11 index 43d89e10e..2d267d65f 100644 --- a/pcre2/testdata/testinput11 +++ b/pcre2/testdata/testinput11 @@ -4,11 +4,8 @@ # different, so they have separate output files. #forbid_utf +#newline_default LF ANY ANYCRLF -/a\Cb/ - aXb - a\nb - /[^\x{c4}]/IB /\x{100}/I @@ -343,7 +340,7 @@ # Non-UTF characters -/\C{2,3}/ +/.{2,3}/ \x{400000}\x{400001}\x{400002}\x{400003} /\x{400000}\x{800000}/IBi @@ -354,4 +351,21 @@ /[\V]/IB +/(*THEN:\[A]{65501})/expand + +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abý¿¿¿¿¿z/utf8_input + abý¿¿¿¿¿z + ab\x{7fffffff}z + +/abÿý¿¿¿¿¿z/utf8_input + abÿý¿¿¿¿¿z + ab\x{ffffffff}z + +/abÿAz/utf8_input + abÿAz + ab\x{80000041}z + # End of testinput11 diff --git a/pcre2/testdata/testinput12 b/pcre2/testdata/testinput12 index 1cba4af6e..c3b2bfc97 100644 --- a/pcre2/testdata/testinput12 +++ b/pcre2/testdata/testinput12 @@ -7,49 +7,6 @@ /abc/utf Ã] -/X(\C{3})/utf - X\x{11234}Y - X\x{11234}YZ - -/X(\C{4})/utf - X\x{11234}YZ - X\x{11234}YZW - -/X\C*/utf - XYZabcdce - -/X\C*?/utf - XYZabcde - -/X\C{3,5}/utf - Xabcdefg - X\x{11234}Y - X\x{11234}YZ - X\x{11234}\x{512} - X\x{11234}\x{512}YZ - X\x{11234}\x{512}\x{11234}Z - -/X\C{3,5}?/utf - Xabcdefg - X\x{11234}Y - X\x{11234}YZ - X\x{11234}\x{512}YZ - *** Failers - X\x{11234} - -/a\Cb/utf - aXb - a\nb - -/a\C\Cb/utf - a\x{12257}b - a\x{12257}\x{11234}b - ** Failers - a\x{100}b - -/ab\Cde/utf - abXde - # Check maximum character size /\x{ffff}/IB,utf @@ -90,27 +47,12 @@ /\x{212ab}/IB,utf -# This one is here not because it's different to Perl, but because the way -# the captured single-byte is displayed. (In Perl it becomes a character, and you -# can't tell the difference.) - -/X(\C)(.*)/utf - X\x{1234} - X\nabc - -# This one is here because Perl gives out a grumbly error message (quite -# correctly, but that messes up comparisons). - -/a\Cb/utf - *** Failers - a\x{100}b - /[^ab\xC0-\xF0]/IB,utf \x{f1} \x{bf} \x{100} \x{1000} - *** Failers +\= Expect no match \x{c0} \x{f0} @@ -137,7 +79,6 @@ \x{100} Z\x{100} \x{100}Z - *** Failers /[\xff]/IB,utf >\x{ff}< @@ -157,18 +98,24 @@ /^[\QÄ€\E-\QÅ\E/B,utf /X/utf - XX\x{d800} XX\x{d800}\=no_utf_check - XX\x{da00} XX\x{da00}\=no_utf_check - XX\x{dc00} XX\x{dc00}\=no_utf_check - XX\x{de00} XX\x{de00}\=no_utf_check - XX\x{dfff} XX\x{dfff}\=no_utf_check +\= Expect UTF error + XX\x{d800} + XX\x{da00} + XX\x{dc00} + XX\x{de00} + XX\x{dfff} XX\x{110000} XX\x{d800}\x{1234} +\= Expect no match + XX\x{d800}\=offset=3 + +/(?<=.)X/utf + XX\x{d800}\=offset=3 /(*UTF16)\x{11234}/ abcd\x{11234}pqr @@ -229,7 +176,9 @@ a\x{123}aa\=offset=1 a\x{123}aa\=offset=2 a\x{123}aa\=offset=3 +\= Expect no match a\x{123}aa\=offset=4 +\= Expect bad offset error a\x{123}aa\=offset=5 a\x{123}aa\=offset=6 @@ -250,11 +199,16 @@ # Check bad offset /a/utf +\= Expect bad UTF-16 offset, or no match in 32-bit \x{10000}\=offset=1 \x{10000}ab\=offset=1 +\= Expect 16-bit match, 32-bit no match \x{10000}ab\=offset=2 +\= Expect no match \x{10000}ab\=offset=3 +\= Expect no match in 16-bit, bad offset in 32-bit \x{10000}ab\=offset=4 +\= Expect bad offset \x{10000}ab\=offset=5 /í¼€/utf @@ -329,9 +283,6 @@ /\o{4200000}/utf -/\C/utf - \x{110000} - /\x{100}*A/IB,utf A @@ -341,7 +292,6 @@ Z\x{100} \x{100} \x{100}Z - *** Failers /[z-\x{100}]/IB,utf @@ -367,7 +317,7 @@ \x{104} \x{105} \x{109} - ** Failers +\= Expect no match \x{100} \x{10a} @@ -381,7 +331,7 @@ \x{ff} \x{100} \x{101} - ** Failers +\= Expect no match \x{102} Y y @@ -390,4 +340,24 @@ /\x{3a3}B/IBi,utf +/./utf + \x{110000} + +/(*UTF)abý¿¿¿¿¿z/B + +/abý¿¿¿¿¿z/utf + +/[\W\p{Any}]/B + abc + 123 + +/[\W\pL]/B + abc + \x{100} + \x{308} +\= Expect no match + 123 + +/[\s[:^ascii:]]/B,ucp + # End of testinput12 diff --git a/pcre2/testdata/testinput14 b/pcre2/testdata/testinput14 index 69ca2b6a8..f97f3ec2c 100644 --- a/pcre2/testdata/testinput14 +++ b/pcre2/testdata/testinput14 @@ -1,112 +1,37 @@ -# These are: -# -# (1) Tests of the match-limiting features. The results are different for -# interpretive or JIT matching, so this test should not be run with JIT. The -# same tests are run using JIT in test 16. +# These test special (mostly error) UTF features of DFA matching. They are a +# selection of the more comprehensive tests that are run for non-DFA matching. +# The output is different for the different widths. -# (2) Other tests that must not be run with JIT. +#subject dfa -/(a+)*zz/I - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits - aaaaaaaaaaaaaz\=find_limits +/X/utf + XX\x{d800} + XX\x{d800}\=offset=3 + XX\x{d800}\=no_utf_check + XX\x{da00} + XX\x{da00}\=no_utf_check + XX\x{dc00} + XX\x{dc00}\=no_utf_check + XX\x{de00} + XX\x{de00}\=no_utf_check + XX\x{dfff} + XX\x{dfff}\=no_utf_check + XX\x{110000} + XX\x{d800}\x{1234} + +/badutf/utf + X\xdf + XX\xef + XXX\xef\x80 + X\xf7 + XX\xf7\x80 + XXX\xf7\x80\x80 -!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I - /* this is a C style comment */\=find_limits - -/^(?>a)++/ - aa\=find_limits - aaaaaaaaa\=find_limits - -/(a)(?1)++/ - aa\=find_limits - aaaaaaaaa\=find_limits - -/a(?:.)*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits - -/a(?:.(*THEN))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits - -/a(?:.(*THEN:ABC))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits - -/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ - aabbccddee\=find_limits - -/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ - aabbccddee\=find_limits - -/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ - aabbccddee\=find_limits - -/(*LIMIT_MATCH=12bc)abc/ - -/(*LIMIT_MATCH=4294967290)abc/ - -/(*LIMIT_RECURSION=4294967280)abc/I - -/(a+)*zz/ - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=match_limit=3000 - -/(a+)*zz/ - aaaaaaaaaaaaaz\=recursion_limit=10 - -/(*LIMIT_MATCH=3000)(a+)*zz/I - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=match_limit=60000 - -/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I - aaaaaaaaaaaaaz - -/(*LIMIT_MATCH=60000)(a+)*zz/I - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=match_limit=3000 - -/(*LIMIT_RECURSION=10)(a+)*zz/I - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=recursion_limit=1000 - -/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I - aaaaaaaaaaaaaz - -/(*LIMIT_RECURSION=1000)(a+)*zz/I - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=recursion_limit=10 - -# These three have infinitely nested recursions. - -/((?2))((?1))/ - abc - -/((?(R2)a+|(?1)b))/ - aaaabcde - -/(?(R)a*(?1)|((?R))b)/ - aaaabcde - -# The allusedtext modifier does not work with JIT, which does not maintain -# the leftchar/rightchar data. - -/abc(?=xyz)/allusedtext - abcxyzpqr - abcxyzpqr\=aftertext - -/(?<=pqr)abc(?=xyz)/allusedtext - xyzpqrabcxyzpqr - xyzpqrabcxyzpqr\=aftertext - -/a\b/ - a.\=allusedtext - a\=allusedtext - -/abc\Kxyz/ - abcxyz\=allusedtext - -/abc(?=xyz(*ACCEPT))/ - abcxyz\=allusedtext - -/abc(?=abcde)(?=ab)/allusedtext - abcabcdefg +/shortutf/utf + XX\xdf\=ph + XX\xef\=ph + XX\xef\x80\=ph + \xf7\=ph + \xf7\x80\=ph # End of testinput14 diff --git a/pcre2/testdata/testinput15 b/pcre2/testdata/testinput15 index d23b12725..b1aaf781c 100644 --- a/pcre2/testdata/testinput15 +++ b/pcre2/testdata/testinput15 @@ -1,9 +1,168 @@ -# This test is run only when JIT support is not available. It checks that an -# attempt to use it has the expected behaviour. It also tests things that -# are different without JIT. +# These are: +# +# (1) Tests of the match-limiting features. The results are different for +# interpretive or JIT matching, so this test should not be run with JIT. The +# same tests are run using JIT in test 17. -/abc/I,jit,jitverify +# (2) Other tests that must not be run with JIT. -/a*/I +/(a+)*zz/I + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits + aaaaaaaaaaaaaz\=find_limits + +!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I + /* this is a C style comment */\=find_limits + +/^(?>a)++/ + aa\=find_limits + aaaaaaaaa\=find_limits + +/(a)(?1)++/ + aa\=find_limits + aaaaaaaaa\=find_limits + +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits + +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits + +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits + +/(*LIMIT_MATCH=12bc)abc/ + +/(*LIMIT_MATCH=4294967290)abc/ + +/(*LIMIT_RECURSION=4294967280)abc/I + +/(a+)*zz/ + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\=match_limit=3000 + +/(a+)*zz/ + aaaaaaaaaaaaaz\=recursion_limit=10 + +/(*LIMIT_MATCH=3000)(a+)*zz/I + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\=match_limit=60000 + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I + aaaaaaaaaaaaaz + +/(*LIMIT_MATCH=60000)(a+)*zz/I + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\=match_limit=3000 + +/(*LIMIT_RECURSION=10)(a+)*zz/I + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\=recursion_limit=1000 + +/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I + aaaaaaaaaaaaaz + +/(*LIMIT_RECURSION=1000)(a+)*zz/I + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\=recursion_limit=10 + +# These three have infinitely nested recursions. + +/((?2))((?1))/ + abc + +/((?(R2)a+|(?1)b))()/ + aaaabcde + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde + +# The allusedtext modifier does not work with JIT, which does not maintain +# the leftchar/rightchar data. + +/abc(?=xyz)/allusedtext + abcxyzpqr + abcxyzpqr\=aftertext + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + xyzpqrabcxyzpqr\=aftertext + +/a\b/ + a.\=allusedtext + a\=allusedtext + +/abc\Kxyz/ + abcxyz\=allusedtext + +/abc(?=xyz(*ACCEPT))/ + abcxyz\=allusedtext + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg + +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I + abcd + +/(a|(?R))/I + abcd + defg + +/(ab|(bc|(de|(?R))))/I + abcd + fghi + +/(ab|(bc|(de|(?1))))/I + abcd + fghi + +/x(ab|(bc|(de|(?1)x)x)x)/I + xab123 + xfghi + +/(?!\w)(?R)/ + abcd + =abc + +/(?=\w)(?R)/ + =abc + abcd + +/(?a)++/ - aa\=find_limits - aaaaaaaaa\=find_limits - -/(a)(?1)++/ - aa\=find_limits - aaaaaaaaa\=find_limits - -/a(?:.)*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits - -/a(?:.(*THEN))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits - -/a(?:.(*THEN:ABC))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits - -/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ - aabbccddee\=find_limits - -/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ - aabbccddee\=find_limits - -/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ - aabbccddee\=find_limits - -/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast - aabbccddee\=find_limits - aabbccddee\=jitstack=1 - -/(a+)*zz/ - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=match_limit=3000 - -/(*LIMIT_MATCH=3000)(a+)*zz/I - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=match_limit=60000 - -/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I - aaaaaaaaaaaaaz - -/(*LIMIT_MATCH=60000)(a+)*zz/I - aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=match_limit=3000 - -# These three have infinitely nested recursions. - -/((?2))((?1))/ - abc - -/((?(R2)a+|(?1)b))/ - aaaabcde - -/(?(R)a*(?1)|((?R))b)/ - aaaabcde - -# Invalid options disable JIT when called via pcre2_match(), causing the -# match to happen via the interpreter, but for fast JIT invalid options are -# ignored, so an unanchored match happens. - -/abcd/ - abcd\=anchored - fail abcd\=anchored - -/abcd/jitfast - abcd\=anchored - succeed abcd\=anchored - -# Push/pop does not lose the JIT information, though jitverify applies only to -# compilation, but serializing (save/load) discards JIT data completely. - -/^abc\Kdef/info,push -#pop jitverify - abcdef - -/^abc\Kdef/info,push -#save testsaved1 -#load testsaved1 -#pop jitverify - abcdef - -#load testsaved1 -#pop jit,jitverify - abcdef - -# Test pattern compilation - -/(?:a|b|c|d|e)(?R)/jit=1 - -/(?:a|b|c|d|e)(?R)(?R)/jit=1 - -/(a(?:a|b|c|d|e)b){8,16}/jit=1 +/a*/I # End of testinput16 diff --git a/pcre2/testdata/testinput17 b/pcre2/testdata/testinput17 index d636d38eb..9a73ef135 100644 --- a/pcre2/testdata/testinput17 +++ b/pcre2/testdata/testinput17 @@ -1,95 +1,300 @@ -# This set of tests is run only with the 8-bit library. It tests the POSIX -# interface, which is supported only with the 8-bit library. This test should -# not be run with JIT (which is not available for the POSIX interface). +# This test is run only when JIT support is available. It checks JIT complete +# and partial modes, and things that are different with JIT. + +#pattern jitverify + +# JIT does not support this pattern (callout at start of condition). + +/(?(?C1)(?=a)a)/I + +# The following pattern cannot be compiled by JIT. + +/b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*/I + +# Check that an infinite recursion loop is caught. + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde + +/abcd/I + abcd +\= Expect no match + xyz + +/(*NO_JIT)abcd/I + abcd +\= Expect no match + xyz + +/abcd/ + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/jitfast + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/jit=1 + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + xyz\=ps + +/abcd/jit=1,jitfast + abcd + ab\=ps + ab\=ph + xyz\=ps +\= Expect no match + xyz + +/abcd/jit=2 + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/jit=2,jitfast + abcd + ab\=ps + ab\=ph + xyz + +/abcd/jit=3 + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/jit=4 + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/jit=5 + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/jit=6 + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/jit=7 + abcd + ab\=ps + ab\=ph +\= Expect no match + xyz + +/abcd/I,jit=2 + +/(*NO_START_OPT)a(*:m)b/mark +\= Expect no match + a + +/^12345678abcd/m + 12345678abcd -#forbid_utf -#pattern posix +# Limits tests that give different output with JIT. -# Test invalid options +/(a+)*zz/I + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits +\= Expect no match + aaaaaaaaaaaaaz\=find_limits -/abc/auto_callout +!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I + /* this is a C style comment */\=find_limits -/abc/ - abc\=find_limits +/^(?>a)++/ + aa\=find_limits + aaaaaaaaa\=find_limits + +/(a)(?1)++/ + aa\=find_limits + aaaaaaaaa\=find_limits -/abc/ - abc\=partial_hard +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits -# Real tests +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits -/abc/ +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast + aabbccddee\=find_limits + aabbccddee\=jitstack=1 + +/(a+)*zz/ +\= Expect no match + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 + +/(*LIMIT_MATCH=3000)(a+)*zz/I + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\=match_limit=60000 + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I + aaaaaaaaaaaaaz + +/(*LIMIT_MATCH=60000)(a+)*zz/I +\= Expect no match + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 + +# These three have infinitely nested recursions. + +/((?2))((?1))/ abc - *** Failers -/^abc|def/ +/((?(R2)a+|(?1)b))()/ + aaaabcde + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde + +# Invalid options disable JIT when called via pcre2_match(), causing the +# match to happen via the interpreter, but for fast JIT invalid options are +# ignored, so an unanchored match happens. + +/abcd/ + abcd\=anchored +\= Expect no match + fail abcd\=anchored + +/abcd/jitfast + abcd\=anchored + succeed abcd\=anchored + +# Push/pop does not lose the JIT information, though jitverify applies only to +# compilation, but serializing (save/load) discards JIT data completely. + +/^abc\Kdef/info,push +#pop jitverify abcdef - abcdef\=notbol -/.*((abc)$|(def))/ - defabc - defabc\=noteol +/^abc\Kdef/info,push +#save testsaved1 +#load testsaved1 +#pop jitverify + abcdef + +#load testsaved1 +#pop jit,jitverify + abcdef + +/abcd/pushcopy,jitverify + abcd + +#pop jitverify + abcd + +# Test pattern compilation -/the quick brown fox/ - the quick brown fox - *** Failers - The Quick Brown Fox +/(?:a|b|c|d|e)(?R)/jit=1 -/the quick brown fox/i - the quick brown fox - The Quick Brown Fox +/(?:a|b|c|d|e)(?R)(?R)/jit=1 -/abc.def/ - *** Failers - abc\ndef +/(a(?:a|b|c|d|e)b){8,16}/jit=1 -/abc$/ +/(?:|a|){100}x/jit=1 + +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I + abcd + +/(a|(?R))/I + abcd + defg + +/(ab|(bc|(de|(?R))))/I + abcd + fghi + +/(ab|(bc|(de|(?1))))/I + abcd + fghi + +/x(ab|(bc|(de|(?1)x)x)x)/I + xab123 + xfghi + +/(?!\w)(?R)/ + abcd + =abc + +/(?=\w)(?R)/ + =abc + abcd + +/(?b)c/no_auto_capture - abc - -/a?|b?/ - abc - ** Failers - ddd\=notempty - -/\w+A/ - CDAAAAB - -/\w+A/ungreedy - CDAAAAB - -/\Biss\B/I,aftertext - Mississippi - -/abc/\ - -"(?(?C)" - -# End of testdata/testinput16 +# End of testinput17 diff --git a/pcre2/testdata/testinput18 b/pcre2/testdata/testinput18 index a51790594..bd1c6ad10 100644 --- a/pcre2/testdata/testinput18 +++ b/pcre2/testdata/testinput18 @@ -1,17 +1,112 @@ # This set of tests is run only with the 8-bit library. It tests the POSIX -# interface with UTF/UCP support, which is supported only with the 8-bit -# library. This test should not be run with JIT (which is not available for the -# POSIX interface). +# interface, which is supported only with the 8-bit library. This test should +# not be run with JIT (which is not available for the POSIX interface). +#forbid_utf #pattern posix -/a\x{1234}b/utf - a\x{1234}b +# Test invalid options -/\w/ - +++\x{c2} +/abc/auto_callout -/\w/ucp - +++\x{c2} - -# End of testdata/testinput17 +/abc/ + abc\=find_limits + +/abc/ + abc\=partial_hard + +# Real tests + +/abc/ + abc + +/^abc|def/ + abcdef + abcdef\=notbol + +/.*((abc)$|(def))/ + defabc + defabc\=noteol + +/the quick brown fox/ + the quick brown fox +\= Expect no match + The Quick Brown Fox + +/the quick brown fox/i + the quick brown fox + The Quick Brown Fox + +/(*LF)abc.def/ +\= Expect no match + abc\ndef + +/(*LF)abc$/ + abc + abc\n + +/(abc)\2/ + +/(abc\1)/ +\= Expect no match + abc + +/a*(b+)(z)(z)/ + aaaabbbbzzzz + aaaabbbbzzzz\=ovector=0 + aaaabbbbzzzz\=ovector=1 + aaaabbbbzzzz\=ovector=2 + +/(*ANY)ab.cd/ + ab-cd + ab=cd +\= Expect no match + ab\ncd + +/ab.cd/s + ab-cd + ab=cd + ab\ncd + +/a(b)c/posix_nosub + abc + +/a(?Pb)c/posix_nosub + abc + +/(a)\1/posix_nosub + zaay + +/a?|b?/ + abc +\= Expect no match + ddd\=notempty + +/\w+A/ + CDAAAAB + +/\w+A/ungreedy + CDAAAAB + +/\Biss\B/I,aftertext + Mississippi + +/abc/\ + +"(?(?C)" + +"(?(?C))" + +/abcd/substitute_extended + +/\[A]{1000000}**/expand,regerror_buffsize=31 + +/\[A]{1000000}**/expand,regerror_buffsize=32 + +//posix_nosub + \=offset=70000 + +/(?=(a\K))/ + a + +# End of testdata/testinput18 diff --git a/pcre2/testdata/testinput19 b/pcre2/testdata/testinput19 index 155fd1376..7a90f1aea 100644 --- a/pcre2/testdata/testinput19 +++ b/pcre2/testdata/testinput19 @@ -1,62 +1,18 @@ -# This set of tests exercises the serialization/deserialization functions in -# the library. It does not use UTF or JIT. - -#forbid_utf - -# Compile several patterns, push them onto the stack, and then write them -# all to a file. - -#pattern push - -/(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) - (?(DEFINE) - (?[a-z]+) - (?\d+) - )/x -/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i - -#save testsaved1 - -# Do it again for some more patterns. - -/(*MARK:A)(*SKIP:B)(C|X)/mark -/(?:(?foo)|(?bar))\k/dupnames - -#save testsaved2 -#pattern -push - -# Reload the patterns, then pop them one by one and check them. - -#load testsaved1 -#load testsaved2 - -#pop info - foofoo - barbar +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface with UTF/UCP support, which is supported only with the 8-bit +# library. This test should not be run with JIT (which is not available for the +# POSIX interface). -#pop mark - C - D +#pattern posix + +/a\x{1234}b/utf + a\x{1234}b + +/\w/ +\= Expect no match + +++\x{c2} + +/\w/ucp + +++\x{c2} -#pop - AmanaplanacanalPanama - -#pop info - metcalfe 33 - -# Check for an error when different tables are used. - -/abc/push,tables=1 -/xyz/push,tables=2 -#save testsaved1 - -#pop - xyz - -#pop - abc - -#pop should give an error - pqr - -# End of testinput19 +# End of testdata/testinput19 diff --git a/pcre2/testdata/testinput2 b/pcre2/testdata/testinput2 index e0e149f42..a700d5ae2 100644 --- a/pcre2/testdata/testinput2 +++ b/pcre2/testdata/testinput2 @@ -9,6 +9,7 @@ # test 5. #forbid_utf +#newline_default lf any anycrlf # Test binary zeroes in the pattern @@ -30,14 +31,14 @@ abc defabc abc\=anchored - *** Failers +\= Expect no match defabc\=anchored ABC /^abc/I abc abc\=anchored - *** Failers +\= Expect no match defabc defabc\=anchored @@ -51,7 +52,7 @@ /^abc$/I abc - *** Failers +\= Expect no match def\nabc /ab\idef/ @@ -128,7 +129,7 @@ /abc$/I,dollar_endonly abc - *** Failers +\= Expect no match abc\n abc\ndef @@ -140,7 +141,7 @@ /the quick brown fox/I,anchored the quick brown fox - *** Failers +\= Expect no match this is a line with the quick brown fox /ab(?z)cd/ @@ -184,13 +185,13 @@ /(?^abc)/Im abc def\nabc - *** Failers +\= Expect no match defabc /(?<=ab(c+)d)ef/ @@ -294,7 +295,7 @@ /(?<=bullock|donkey)-cart/I the bullock-cart a donkey-cart race - *** Failers +\= Expect no match cart horse-and-cart @@ -311,7 +312,7 @@ zZZ bZZ BZZ - *** Failers +\= Expect no match ZZ abXYZZ zzz @@ -320,30 +321,31 @@ /(?[^()]+) | (?R) )? \) /Ix @@ -720,7 +725,7 @@ /((?-i)[[:lower:]])[[:lower:]]/Ii ab aB - *** Failers +\= Expect no match Ab AB @@ -796,13 +801,13 @@ /a(?i)b/IB ab aB - *** Failers +\= Expect no match AB /(a(?i)b)/IB ab aB - *** Failers +\= Expect no match AB / (?i)abc/IBx @@ -832,7 +837,7 @@ /\Q\Eabc/IB /x*+\w/IB - *** Failers +\= Expect no match xxxxx /x?+/IB @@ -851,12 +856,12 @@ /^(\w++|\s++)*$/I now is the time for all good men to come to the aid of the party - *** Failers +\= Expect no match this is not a line with only words and spaces! /(\d++)(\w)/I 12345a - *** Failers +\= Expect no match 12345+ /a++b/I @@ -874,7 +879,7 @@ /\(([^()]++|\([^()]+\))+\)/I (abc) (abc(def)xyz) - *** Failers +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /(abc){1,3}+/IB @@ -937,7 +942,7 @@ hij> def> - *** Failers +\= Expect no match iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB @@ -1008,7 +1013,7 @@ /^a/Im abcde xy\nabc - *** Failers +\= Expect no match xyabc /c|abc/I @@ -1020,7 +1025,7 @@ /abc(?C)def/I abcdef 1234abcdef - *** Failers +\= Expect no match abcxyz abcxyzf @@ -1029,12 +1034,12 @@ /(?C1)\dabc(?C2)def/I 1234abcdef - *** Failers +\= Expect no match abcdef /(?C1)\dabc(?C2)def/I 1234abcdef - *** Failers +\= Expect no match abcdef /(?C255)ab/I @@ -1046,21 +1051,19 @@ /(?C12vr)x/I /abc(?C)def/I - *** Failers \x83\x0\x61bcdef /(abc)(?C)de(?C1)f/I 123abcdef 123abcdef\=callout_capture 123abcdefC-\=callout_none - *** Failers +\= Expect no match 123abcdef\=callout_fail=1 /(?C0)(abc(?C1))*/I abcabcabc - abcabc\=callout_fail=1:3 - *** Failers - abcabcabc\=callout_fail=1:3 + abcabc\=callout_fail=1:4 + abcabcabc\=callout_fail=1:4 /(\d{3}(?C))*/I 123\=callout_capture @@ -1083,9 +1086,11 @@ abcxyz\=callout_capture /a(b+)(c*)(?C1)/I +\= Expect no match abbbbbccc\=callout_data=1 /a(b+?)(c*?)(?C1)/I +\= Expect no match abbbbbccc\=callout_data=1 /(?C)abc/I @@ -1094,30 +1099,20 @@ /(?C)a|b/I -/(?R)/I - -/(a|(?R))/I - -/(ab|(bc|(de|(?R))))/I - /x(ab|(bc|(de|(?R))))/I xab xbc xde xxab xxxab - *** Failers +\= Expect no match xyab -/(ab|(bc|(de|(?1))))/I - -/x(ab|(bc|(de|(?1)x)x)x)/I - /^([^()]|\((?1)*\))*$/I abc a(b)c a(b(c))d - *** Failers) +\= Expect no match) a(b(c)d /^>abc>([^()]|\((?1)*\))* hij> def> - *** Failers +\= Expect no match cd)ef(?Pgh)/I @@ -1246,7 +1242,7 @@ "\[((?P\d+)(,(?P>elem))*)\]"I [10,20,30,5,5,4,4,2,43,23,4234] - *** Failers +\= Expect no match [] "\[((?P\d+)(,(?P>elem))*)?\]"I @@ -1259,7 +1255,11 @@ /(a(b(?2)c)){0,2}/IB -/[ab]{1}+/IB +/[ab]{1}+/B + +/()(?1){1}/B + +/()(?1)/B /((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii Baby Bjorn Active Carrier - With free SHIPPING!! @@ -1275,6 +1275,7 @@ /abcde/IB,auto_callout abcde +\= Expect no match abcdfe /a*b/IB,auto_callout @@ -1290,18 +1291,19 @@ /a+b/IB,auto_callout ab aaaab +\= Expect no match aaaacb /(abc|def)x/IB,auto_callout abcx defx - ** Failers +\= Expect no match abcdefzx /(abc|def)x/IB,auto_callout abcx defx - ** Failers +\= Expect no match abcdefzx /(ab|cd){3,4}/I,auto_callout @@ -1310,9 +1312,11 @@ abcdcdcdcdcd /([ab]{,4}c|xy)/IB,auto_callout +\= Expect no match Note: that { does NOT introduce a quantifier /([ab]{,4}c|xy)/IB,auto_callout +\= Expect no match Note: that { does NOT introduce a quantifier /([ab]{1,4}c|xy){4,5}?123/IB,auto_callout @@ -1322,7 +1326,7 @@ ab cd\=offset=1 /\b.*/Is - ab cd\=offset=1 + ab cd\=startoffset=1 /(?!.bcd).*/I Xbcd12345 @@ -1333,7 +1337,7 @@ abcd\=ps abcde\=ps the quick brown abc\=ps - ** Failers\=ps +\= Expect no match\=ps the quick brown abxyz fox\=ps "^(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/(20)?\d\d$"I @@ -1348,7 +1352,7 @@ 02/\=ps 02/0\=ps 02/1\=ps - ** Failers\=ps +\= Expect no match\=ps \=ps 123\=ps 33/4/04\=ps @@ -1382,7 +1386,7 @@ 1234X 12345\=ps 12345X - *** Failers +\= Expect no match 1X 123456\=ps @@ -1395,17 +1399,17 @@ /line\nbreak/I,firstline this is a line\nbreak - ** Failers +\= Expect no match line one\nthis is a line\nbreak in the second line /line\nbreak/Im,firstline this is a line\nbreak - ** Failers +\= Expect no match line one\nthis is a line\nbreak in the second line /(?i)(?-i)AbCd/I AbCd - ** Failers +\= Expect no match abcd /a{11111111111111111111}/I @@ -1460,7 +1464,6 @@ /^((?Pa1)|(?Pa2)b)/I,dupnames a1b\=copy=A a2b\=copy=A - ** Failers a1b\=copy=Z,copy=A /(?|(?)(?)(?)|(?)(?)(?))/I,dupnames @@ -1478,7 +1481,6 @@ /^((?Pa1)|(?Pa2)b)/I,dupnames a1b\=get=A a2b\=get=A - ** Failers a1b\=get=Z,get=A /^(?Pa)(?Pb)/I,dupnames @@ -1497,9 +1499,9 @@ /^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I -/ In this next test, J is not set at the outer level; consequently it isn't -set in the pattern's options; consequently pcre_get_named_substring() produces -a random value. /Ix +# In this next test, J is not set at the outer level; consequently it isn't set +# in the pattern's options; consequently pcre2_substring_get_byname() produces +# a random value. /^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I a bc d\=copy=A,copy=B,copy=C @@ -1507,7 +1509,7 @@ a random value. /Ix /^(?Pa)?(?(A)a|b)/I aabc bc - ** Failers +\= Expect no match abc /(?:(?(ZZ)a|b)(?PX))+/I @@ -1539,7 +1541,7 @@ a random value. /Ix /^abc/Im,newline=lf xyz\nabc xyz\r\nabc - ** Failers +\= Expect no match xyz\rabc xyzabc\r xyzabc\rpqr @@ -1548,13 +1550,13 @@ a random value. /Ix /^abc/Im,newline=crlf xyz\r\nabclf> - ** Failers +\= Expect no match xyz\nabclf xyz\rabclf /^abc/Im,newline=cr xyz\rabc - ** Failers +\= Expect no match xyz\nabc xyz\r\nabc @@ -1612,7 +1614,7 @@ a random value. /Ix /^a+A\d/IB aaaA5 - ** Failers +\= Expect no match aaaa5 /^a*A\d/IBi @@ -1784,7 +1786,7 @@ a random value. /Ix /^(?:(?(ZZ)a|b)(?X))+/ bXaX bXbX - ** Failers +\= Expect no match aXaX aXbX @@ -1805,14 +1807,14 @@ a random value. /Ix /^((?(abc)a|b)(?x|y))+/ bxay bxby - ** Failers +\= Expect no match axby /^(((?P=abc)|X)(?x|y))+/ XxXxxx XxXyyx XxXyxx - ** Failers +\= Expect no match x /^(?1)(abc)/ @@ -1845,7 +1847,7 @@ a random value. /Ix Satan, oscillate my metallic sonatas! A man, a plan, a canal: Panama! Able was I ere I saw Elba. - *** Failers +\= Expect no match The quick brown fox /(?=(\w+))\1:/I @@ -1856,14 +1858,14 @@ a random value. /Ix /(?'abc'a|b)(?d|e)\k{2}/dupnames adaa - ** Failers +\= Expect no match addd adbb /(?'abc'a|b)(?d|e)(?&abc){2}/dupnames bdaa bdab - ** Failers +\= Expect no match bddd /(?(tom|bon)-\k{A}/ tom-tom bon-bon - ** Failers +\= Expect no match tom-bon /\g{A/ @@ -2106,7 +2111,7 @@ a random value. /Ix xxyzx /\H++X/B - ** Failers +\= Expect no match XXXX /\H+\hY/B @@ -2162,21 +2167,27 @@ a random value. /Ix /A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B /^a+(*FAIL)/auto_callout +\= Expect no match aaaaaa /a+b?c+(*FAIL)/auto_callout +\= Expect no match aaabccc /a+b?(*PRUNE)c+(*FAIL)/auto_callout +\= Expect no match aaabccc /a+b?(*COMMIT)c+(*FAIL)/auto_callout +\= Expect no match aaabccc /a+b?(*SKIP)c+(*FAIL)/auto_callout +\= Expect no match aaabcccaaabccc /a+b?(*THEN)c+(*FAIL)/auto_callout +\= Expect no match aaabccc /a(*MARK)b/ @@ -2190,6 +2201,7 @@ a random value. /Ix /(?1)\c[/ /.+A/newline=crlf +\= Expect no match \r\nA /\nA/newline=crlf @@ -2205,33 +2217,33 @@ a random value. /Ix /(*CR)a.b/ a\nb - ** Failers +\= Expect no match a\rb /(*CR)a.b/newline=lf a\nb - ** Failers +\= Expect no match a\rb /(*LF)a.b/newline=CRLF a\rb - ** Failers +\= Expect no match a\nb /(*CRLF)a.b/ a\rb a\nb - ** Failers +\= Expect no match a\r\nb /(*ANYCRLF)a.b/newline=CR - ** Failers +\= Expect no match a\rb a\nb a\r\nb /(*ANY)a.b/newline=cr - ** Failers +\= Expect no match a\rb a\nb a\r\nb @@ -2250,7 +2262,7 @@ a random value. /Ix a\rb a\nb a\r\nb - ** Failers +\= Expect no match a\x85b a\x0bb @@ -2265,7 +2277,7 @@ a random value. /Ix a\rb a\nb a\r\nb - ** Failers +\= Expect no match a\x85b a\x0bb @@ -2280,7 +2292,7 @@ a random value. /Ix a\r\n\nb a\n\r\rb a\r\n\r\n\r\n\r\nb - ** Failers +\= Expect no match a\x85\x85b a\x0b\x0bb @@ -2290,7 +2302,7 @@ a random value. /Ix a\r\n\n\r\rb a\x85\x85b a\x0b\x0bb - ** Failers +\= Expect no match a\r\r\r\r\rb /(*BSR_ANYCRLF)a\Rb/I @@ -2343,14 +2355,6 @@ a random value. /Ix /(?P>)/ -/(?!\w)(?R)/ - -/(?=\w)(?R)/ - -/(?x|y){0}z/ xzxx yzyy - ** Failers +\= Expect no match xxz /(\3)(\1)(a)/ +\= Expect no match cat /(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames @@ -2397,12 +2402,12 @@ a random value. /Ix /(?1)[]a()b](abc)/ abcbabc - ** Failers +\= Expect no match abcXabc /(?1)[^]a()b](abc)/ abcXabc - ** Failers +\= Expect no match abcbabc /(?2)[]a()b](abc)(xyz)/ @@ -2419,27 +2424,27 @@ a random value. /Ix /a[^]b/ /a[]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers +\= Expect no match ab /a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers +\= Expect no match ab /a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers +\= Expect no match ab /a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb a\nb - ** Failers +\= Expect no match ab /a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb a\nX\nXb - ** Failers +\= Expect no match ab /a(?!)b/B @@ -2462,13 +2467,13 @@ a random value. /Ix /xyz/auto_callout xyz abcxyz - ** Failers +\= Expect no match abc abcxypqr /xyz/auto_callout,no_start_optimize abcxyz - ** Failers +\= Expect no match abc abcxypqr @@ -2624,10 +2629,12 @@ a random value. /Ix ++++Z1234\=ps /Z(*F)/ +\= Expect no match Z\=ps ZA\=ps /Z(?!)/ +\= Expect no match Z\=ps ZA\=ps @@ -2680,14 +2687,14 @@ a random value. /Ix abcdef abcdef\=notempty_atstart xyzabcdef\=notempty_atstart - ** Failers +\= Expect no match abcdef\=notempty xyzabcdef\=notempty /^(?:(?=abc)|abc\K)/aftertext,startchar abcdef abcdef\=notempty_atstart - ** Failers +\= Expect no match abcdef\=notempty /a?b?/aftertext @@ -2696,13 +2703,13 @@ a random value. /Ix xyzabc\=notempty xyzabc\=notempty_atstart xyz\=notempty_atstart - ** Failers +\= Expect no match xyz\=notempty /^a?b?/aftertext xyz xyzabc - ** Failers +\= Expect no match xyzabc\=notempty xyzabc\=notempty_atstart xyz\=notempty_atstart @@ -2750,7 +2757,7 @@ a random value. /Ix /(?i:\g)(?-i:(?a))/ XaaX - ** Failers +\= Expect no match XAAX /(?-i:\g<+1>)(?i:(a))/ @@ -2835,20 +2842,20 @@ a random value. /Ix (?('quote')[a-z]+|[0-9]+)/Ix,dupnames a"aaaaa b"aaaaa - ** Failers +\= Expect no match b"11111 a"11111 /^(?|(a)(b)(c)(?d)|(?e)) (?('D')X|Y)/IBx,dupnames abcdX eX - ** Failers +\= Expect no match abcdY ey /(?a) (b)(c) (?d (?(R&A)$ | (?4)) )/IBx,dupnames abcdd - ** Failers +\= Expect no match abcdde /abcd*/ @@ -2882,7 +2889,7 @@ a random value. /Ix XABD BAD ABXABD - ** Failers +\= Expect no match ABX BAXBAD @@ -2893,6 +2900,7 @@ a random value. /Ix cat /(\3)(\1)(a)/I +\= Expect no match cat /i(?(DEFINE)(?a))/I @@ -2905,7 +2913,7 @@ a random value. /Ix XabX XAbX CcC - ** Failers +\= Expect no match XABX /(?i)a(?s)b|c/B @@ -2919,13 +2927,15 @@ a random value. /Ix /^(?&t)*(?(DEFINE)(?.))$/B -/ -- This one is here because Perl gives the match as "b" rather than "ab". I - believe this to be a Perl bug. --/ +# This one is here because Perl gives the match as "b" rather than "ab". I +# believe this to be a Perl bug. /(?>a\Kb)z|(ab)/ ab\=startchar /(?P(?P0|)|(?P>L2)(?P>L1))/ + abcd + 0abc /abc(*MARK:)pqr/ @@ -2937,6 +2947,7 @@ a random value. /Ix # bug because replacing the B in the pattern by (B|D) does make it fail. /A(*COMMIT)B/aftertext,mark +\= Expect no match ACABX # These should be different, but in Perl they are not, which I think @@ -2946,13 +2957,14 @@ a random value. /Ix AC /A(*PRUNE)B|A(*PRUNE)C/mark +\= Expect no match AC # Mark names can be duplicated. Perl doesn't give a mark for this one, # though PCRE2 does. /^A(*:A)B|^X(*:A)Y/mark - ** Failers +\= Expect no match XAQQ # COMMIT at the start of a pattern should be the same as an anchor. Perl @@ -2963,16 +2975,19 @@ a random value. /Ix ABCDEFG /(*COMMIT)ABC/no_start_optimize - ** Failers +\= Expect no match DEFGABC /^(ab (c+(*THEN)cd) | xyz)/x +\= Expect no match abcccd /^(ab (c+(*PRUNE)cd) | xyz)/x +\= Expect no match abcccd /^(ab (c+(*FAIL)cd) | xyz)/x +\= Expect no match abcccd # Perl gets some of these wrong @@ -2991,20 +3006,20 @@ a random value. /Ix /A\NB./B ACBD - *** Failers +\= Expect no match A\nB ACB\n /A\NB./Bs ACBD ACB\n - *** Failers +\= Expect no match A\nB /A\NB/newline=crlf A\nB A\rB - ** Failers +\= Expect no match A\r\nB /\R+b/B @@ -3033,6 +3048,7 @@ a random value. /Ix X\x0d\x0a /X\H++\R/B +\= Expect no match X\x0d\x0a /(?<=abc)def/ @@ -3067,17 +3083,21 @@ a random value. /Ix abc\=ph /abc\B/ - abc abc\=ps abc\=ph +\= Expect no match + abc /.+/ +\= Bad offsets + abc\=offset=4 + abc\=offset=-4 +\= Valid data abc\=offset=0 abc\=offset=1 abc\=offset=2 +\= Expect no match abc\=offset=3 - abc\=offset=4 - abc\=offset=-4 /^\cÄ£/ @@ -3116,12 +3136,12 @@ a random value. /Ix /^(a)*+(\w)/ aaaaX - ** Failers +\= Expect no match aaaa /^(?:a)*+(\w)/ aaaaX - ** Failers +\= Expect no match aaaa /(a)++1234/IB @@ -3187,7 +3207,7 @@ a random value. /Ix /(?1)(?:(b(*ACCEPT))){0}c/ bc - ** Failers +\= Expect no match b /(?1)(?:((*ACCEPT))){0}c/ @@ -3195,18 +3215,22 @@ a random value. /Ix c\=notempty /^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match ba /^.*?(?(?=a)a|bc)/ ba /^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match ac /^.*?(?(?=a)a(*THEN)b)c/ +\= Expect no match ac /^.*?(a(*THEN)b)c/ +\= Expect no match aabc /^.*? (?1) c (?(DEFINE)(a(*THEN)b))/x @@ -3223,9 +3247,11 @@ a random value. /Ix /(*MARK:A)(*SKIP:B)(C|X)/mark C +\= Expect no match D /(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match AAAC # ---------------------------- @@ -3240,6 +3266,7 @@ a random value. /Ix /(?>(*ACCEPT)b)c/ c +\= Expect no match c\=notempty /(?:(?>(a)))+a%/allaftertext @@ -3302,14 +3329,13 @@ a random value. /Ix /[:a[:abc]b:]/B -/(a+|(?R)b)/ - /^(a(*:A)(d|e(*:B))z|aeq)/auto_callout adz aez aeqwerty /.(*F)/ +\= Expect no match abc\=ph /\btype\b\W*?\btext\b\W*?\bjavascript\b/I @@ -3323,9 +3349,10 @@ a random value. /Ix ax 'a*(*ACCEPT)b'aftertext - \=notempty_atstart abc\=notempty_atstart bbb\=notempty_atstart +\= Expect no match + \=notempty_atstart /(*ACCEPT)a/I,aftertext bax @@ -3335,7 +3362,7 @@ a random value. /Ix /^(?>a+)(?>(z+))\w/B aaaazzzzb - ** Failers +\= Expect no match aazz /(.)(\1|a(?2))/ @@ -3345,6 +3372,7 @@ a random value. /Ix cbbbc /(.)((?(1)c|a)|a(?2))/ +\= Expect no match baa /(?P(?P=abn)xxx)/B @@ -3353,7 +3381,7 @@ a random value. /Ix /^a\x41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aAz - *** Failers +\= Expect no match ax41z /^a[m\x41]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames @@ -3364,7 +3392,7 @@ a random value. /Ix /^a\u0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aAz - *** Failers +\= Expect no match au0041z /^a[m\u0041]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames @@ -3372,12 +3400,12 @@ a random value. /Ix /^a\u041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames au041z - *** Failers +\= Expect no match aAz /^a\U0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aU0041z - *** Failers +\= Expect no match aAz /(?(?=c)c|d)++Y/B @@ -3402,7 +3430,7 @@ a random value. /Ix /(A|P)(*:A)(B|P) | (X|P)(X|P)(*:B)(Y|P)/x,mark AABC XXYZ - ** Failers +\= Expect no match XAQQ XAQQXZZ AXQQQ @@ -3413,12 +3441,12 @@ a random value. /Ix /(b|q)(*:m)f|a(*:n)w/mark aw - ** Failers +\= Expect no match abc /(q|b)(*:m)f|a(*:n)w/mark aw - ** Failers +\= Expect no match abc # After a partial match, the behaviour is as for a failure. @@ -3666,8 +3694,9 @@ a random value. /Ix ad /^(?!a(*THEN)b|ac)../ - ac ad +\= Expect no match + ac /^(?=a(*THEN)b|ac)/ ac @@ -3694,6 +3723,7 @@ a random value. /Ix bnn /(?(?=b(*SKIP)a)bn|bnn)/ +\= Expect no match bnn /(?=b(*THEN)a|)bn|bnn/ @@ -3712,41 +3742,40 @@ a random value. /Ix /[bcd]*a/B -# A complete set of tests for auto-possessification of character types. +# A complete set of tests for auto-possessification of character types, but +# omitting \C because it might be disabled (it has its own tests). -/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\C \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx +/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx -/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\C \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx +/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx -/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\C \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx +/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx -/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\C \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx +/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx -/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\C \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx +/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx -/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\C \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx +/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx -/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\C \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx +/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx -/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\C \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx +/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx -/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\C \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx +/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx -/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\C \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx +/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx -/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\C \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx +/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx -/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\C \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx +/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx -/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\C a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx +/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx -/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\C \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx -/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx -/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx - -/\D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \C+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx +/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx /(?=a+)a(a+)++a/B @@ -3889,7 +3918,7 @@ a random value. /Ix a /red/ thing red is a colour put it all on red - ** Failers +\= Expect no match no reduction Alfred Winifred @@ -3899,6 +3928,7 @@ a random value. /Ix abcd\=startchar /abcd/newline=lf,firstline +\= Expect no match xx\nxabcd # Test stack guard external calls. @@ -3937,7 +3967,7 @@ a random value. /Ix /^(?(?=abc)\w{3}:|\d\d)$/ abc: 12 - *** Failers +\= Expect no match 123 xyz @@ -3975,13 +4005,13 @@ a random value. /Ix /(?(VERSION=8)yes|no){3}/I yesnononoyes - ** Failers +\= Expect no match yesno /(?:(?abc)|xyz)(?(VERSION)yes|no)/I abcyes xyzno - ** Failers +\= Expect no match abcno xyzyes @@ -3991,6 +4021,8 @@ a random value. /Ix /(?(VERSION>=10.0.0)yes|no)/ +/(?(VERSION=10.101)yes|no)/ + /abcd/I /abcd/I,no_start_optimize @@ -4008,8 +4040,6 @@ a random value. /Ix /(((((a)))))/parens_nest_limit=2 -# Tests for pcre2_substitute() - /abc/replace=XYZ 123123 123abc123 @@ -4087,13 +4117,58 @@ a random value. /Ix /(.)(.)/g,replace=$2$1 abcdefgh + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + apple strudel + fruitless -# End of substitute tests +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + apple strudel + fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry + apple lemon blackberry\=substitute_overflow_length + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry + +/abc/ + 123abc123\=replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[6]XYZ + 123abc123\=substitute_overflow_length,replace=[1]XYZ + 123abc123\=substitute_overflow_length,replace=[0]XYZ + +/a(b)c/ + 123abc123\=replace=[9]x$1z + 123abc123\=substitute_overflow_length,replace=[9]x$1z + 123abc123\=substitute_overflow_length,replace=[6]x$1z + 123abc123\=substitute_overflow_length,replace=[1]x$1z + 123abc123\=substitute_overflow_length,replace=[0]x$1z "((?=(?(?=(?(?=(?(?=()))))))))" a "(?(?=)==)(((((((((?=)))))))))" +\= Expect no match a /(a)(b)|(c)/ @@ -4117,9 +4192,11 @@ a random value. /Ix /^abc/info,no_dotstar_anchor /.*\d/info,auto_callout +\= Expect no match aaa /.*\d/info,no_dotstar_anchor,auto_callout +\= Expect no match aaa /.*\d/dotall,info @@ -4161,7 +4238,7 @@ a random value. /Ix /(?(?=0)(?=00)?00765|(?!3).56)/ 00765 456 - ** Failers +\= Expect no match 356 '^(a)*+(\w)' @@ -4233,7 +4310,7 @@ a random value. /Ix /(?(?!)a|b)/ bbb - ** Failers +\= Expect no match aaa # JIT gives a different error message for the infinite recursion @@ -4265,9 +4342,8 @@ a random value. /Ix /((?2){73}(?2))((?1))/info -/ab\Cde/never_backslash_c - /abc/ +\= Expect no match \[9x!xxx(]{9999} /(abc)*/ @@ -4327,4 +4403,570 @@ a random value. /Ix /^(?:(?(1)x|)+)+$()/B +/[[:>:]](?<)/ + +/((?x)(*:0))#(?'/ + +/(?C$[$)(?<]/ + +/(?C$)$)(?<]/ + +/(?(R))*+/B + abcd + +/((?x)(?#))#(?'/ + +/((?x)(?#))#(?'abc')/I + +/[[:\\](?<[::]/ + +/[[:\\](?'abc')[a:]/I + +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" + +/()(?(R)0)*+/B + +/(?R-:(?${1:+\Q$1:{}$$\E+\U$1}< + a + +/X(b)Y/substitute_extended + XbY\=replace=x${1:+$1\U$1}y + XbY\=replace=\Ux${1:+$1$1}y + +/a/substitute_extended,replace=${*MARK:+a:b} + a + +/(abcd)/replace=${1:+xy\kz},substitute_extended + abcd + +/(abcd)/ + abcd\=replace=${1:+xy\kz},substitute_extended + +/abcd/substitute_extended,replace=>$1< + abcd + +/abcd/substitute_extended,replace=>xxx${xyz}<<< + abcd + +/(?J)(?:(?a)|(?b))/replace=<$A> + [a] + [b] +\= Expect error + (a)\=ovector=1 + +/(a)|(b)/replace=<$1> +\= Expect error + b + +/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 + aaBB + +/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I + +/((p(?'K/ + +/((p(?'K/no_auto_capture + +/abc/replace=A$3123456789Z + abc + +/(?$1<,substitute_unset_empty + cat + xbcom + +/a|(b)c/ + cat\=replace=>$1< + cat\=replace=>$1<,substitute_unset_empty + xbcom\=replace=>$1<,substitute_unset_empty + +/a|(b)c/substitute_extended + cat\=replace=>${2:-xx}< + cat\=replace=>${2:-xx}<,substitute_unknown_unset + cat\=replace=>${X:-xx}<,substitute_unknown_unset + +/a|(?'X'b)c/replace=>$X<,substitute_unset_empty + cat + xbcom + +/a|(?'X'b)c/replace=>$Y<,substitute_unset_empty + cat + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty + +/a|(b)c/replace=>$2<,substitute_unset_empty + cat + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty + +/()()()/use_offset_limit + \=ovector=11000000000 + \=callout_fail=11000000000 + \=callout_fail=1:11000000000 + \=callout_data=11000000000 + \=callout_data=-11000000000 + \=offset_limit=1100000000000000000000 + \=copy=11000000000 + +/(*MARK:A\x00b)/mark + abc + +/(*MARK:A\x00b)/mark,alt_verbnames + abc + +/"(*MARK:A" 00 "b)"/mark,hex + abc + +/"(*MARK:A" 00 "b)"/mark,hex,alt_verbnames + abc + +/efg/hex + +/eff/hex + +/effg/hex + +/(?J)(?'a'))(?'a')/ + +/(?<=((?C)0))/ + 9010 +\= Expect no match + abc + +/aaa/ +\[abc]{10000000000000000000000000000} +\[a]{3} + +/\[AB]{6000000000000000000000}/expand + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex + +/'(*'/hex + +/'('/hex + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + b11bz +\= Expect no match + b11az + +/(?|([ab]))...(?<=\1)z/ + +/([ab])(\1)...(?<=\2)z/ + aa11az + +/(a\2)(b\1)(?<=\2)/ + +/(?[ab])...(?<=\k'A')z/ + a11az + b11bz +\= Expect no match + b11az + +/(?[ab])...(?<=\k'A')(?)z/dupnames + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + +/ab(?C1)c/auto_callout + abc + +/'ab(?C1)c'/hex,auto_callout + abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ + a-a9-a + +/[A-[:digit:]]+/ + A-A9-A + +/[a-\d]+/ + a-a9-a + +/(?abc)(?(R)xyz)/B + +/(?abc)(?(R)xyz)/B + +/(?=.*[A-Z])/I + +/()(?<=(?0))/ + +/(?*?\g'0/use_length + +/.>*?\g'0/ + +/{„Í„ÍÍ„Í{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́ÍÍ„Í{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000 +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 + +"(?(?C))" + +/(?(?(?(?(?(?))))))/ + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc + +//replace=0 + \=offset=7 + +".+\QX\E+"B,no_auto_possess + +".+\QX\E+"B,auto_callout,no_auto_possess + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 + +/()(\g+65534)/ + +/()(\g+65533)/ + +/Á\x00\x00\x00š(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00ÿÿ\x00š(\1{50779}?)J\w2/I + +/(a)(b)\2\1\1\1\1/I + +/(?a)(?b)\g{b}\g{a}\g{a}\g{a}\g{a}(?xx)(?zz)/I,dupnames + # End of testinput2 diff --git a/pcre2/testdata/testinput20 b/pcre2/testdata/testinput20 new file mode 100644 index 000000000..c87a07e04 --- /dev/null +++ b/pcre2/testdata/testinput20 @@ -0,0 +1,100 @@ +# This set of tests exercises the serialization/deserialization and code copy +# functions in the library. It does not use UTF or JIT. + +#forbid_utf + +# Compile several patterns, push them onto the stack, and then write them +# all to a file. + +#pattern push + +/(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) + (?(DEFINE) + (?[a-z]+) + (?\d+) + )/x +/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i + +#save testsaved1 + +# Do it again for some more patterns. + +/(*MARK:A)(*SKIP:B)(C|X)/mark +/(?:(?foo)|(?bar))\k/dupnames + +#save testsaved2 +#pattern -push + +# Reload the patterns, then pop them one by one and check them. + +#load testsaved1 +#load testsaved2 + +#pop info + foofoo + barbar + +#pop mark + C +\= Expect no match + D + +#pop + AmanaplanacanalPanama + +#pop info + metcalfe 33 + +# Check for an error when different tables are used. + +/abc/push,tables=1 +/xyz/push,tables=2 +#save testsaved1 + +#pop + xyz + +#pop + abc + +#pop should give an error + pqr + +/abcd/pushcopy + abcd + +#pop + abcd + +#pop should give an error + +/abcd/push +#popcopy + abcd + +#pop + abcd + +/abcd/push +#save testsaved1 +#pop should give an error + +#load testsaved1 +#popcopy + abcd + +#pop + abcd + +#pop should give an error + +/abcd/pushtablescopy + abcd + +#popcopy + abcd + +#pop + abcd + +# End of testinput20 diff --git a/pcre2/testdata/testinput21 b/pcre2/testdata/testinput21 new file mode 100644 index 000000000..1d1fbedf0 --- /dev/null +++ b/pcre2/testdata/testinput21 @@ -0,0 +1,16 @@ +# These are tests of \C that do not involve UTF. They are not run when \C is +# disabled by compiling with --enable-never-backslash-C. + +/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx + +/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx + +/ab\Cde/never_backslash_c + +/ab\Cde/info + abXde + +/(?<=ab\Cde)X/ + abZdeX + +# End of testinput21 diff --git a/pcre2/testdata/testinput22 b/pcre2/testdata/testinput22 new file mode 100644 index 000000000..7ada9aaf9 --- /dev/null +++ b/pcre2/testdata/testinput22 @@ -0,0 +1,97 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info + abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf + ab!deXYZ + +# Autopossessification tests + +/\C+\X \X+\C/Bx + +/\C+\X \X+\C/Bx,utf + +/\C\X*TÓ…; +{0,6}\v+ F +/utf +\= Expect no match + Ó…\x0a + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ + +/X(\C{3})/utf + X\x{1234} + X\x{11234}Y + X\x{11234}YZ + +/X(\C{4})/utf + X\x{1234}YZ + X\x{11234}YZ + X\x{11234}YZW + +/X\C*/utf + XYZabcdce + +/X\C*?/utf + XYZabcde + +/X\C{3,5}/utf + Xabcdefg + X\x{1234} + X\x{1234}YZ + X\x{1234}\x{512} + X\x{1234}\x{512}YZ + X\x{11234}Y + X\x{11234}YZ + X\x{11234}\x{512} + X\x{11234}\x{512}YZ + X\x{11234}\x{512}\x{11234}Z + +/X\C{3,5}?/utf + Xabcdefg + X\x{1234} + X\x{1234}YZ + X\x{1234}\x{512} + X\x{11234}Y + X\x{11234}YZ + X\x{11234}\x{512}YZ + X\x{11234} + +/a\Cb/utf + aXb + a\nb + a\x{100}b + +/a\C\Cb/utf + a\x{100}b + a\x{12257}b + a\x{12257}\x{11234}b + +/ab\Cde/utf + abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + X\nabc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b + +# End of testinput22 diff --git a/pcre2/testdata/testinput23 b/pcre2/testdata/testinput23 new file mode 100644 index 000000000..d0a9bc4f5 --- /dev/null +++ b/pcre2/testdata/testinput23 @@ -0,0 +1,7 @@ +# This test is run when PCRE2 has been built with --enable-never-backslash-C, +# which disables the use of \C. All we can do is check that it gives the +# correct error message. + +/a\Cb/ + +# End of testinput23 diff --git a/pcre2/testdata/testinput3 b/pcre2/testdata/testinput3 index 2e1fbf515..71e95fec4 100644 --- a/pcre2/testdata/testinput3 +++ b/pcre2/testdata/testinput3 @@ -8,35 +8,35 @@ #forbid_utf /^[\w]+/ - *** Failers +\= Expect no match École /^[\w]+/locale=fr_FR École /^[\w]+/ - *** Failers +\= Expect no match École /^[\W]+/ École /^[\W]+/locale=fr_FR - *** Failers +\= Expect no match École /[\b]/ \b - *** Failers +\= Expect no match a /[\b]/locale=fr_FR \b - *** Failers +\= Expect no match a /^\w+/ - *** Failers +\= Expect no match École /^\w+/locale=fr_FR @@ -46,12 +46,12 @@ École /(.+)\b(.+)/locale=fr_FR - *** Failers +\= Expect no match École /École/i École - *** Failers +\= Expect no match école /École/i,locale=fr_FR @@ -72,7 +72,7 @@ /^[\xc8-\xc9]/ École - *** Failers +\= Expect no match école /\W+/ diff --git a/pcre2/testdata/testinput4 b/pcre2/testdata/testinput4 index 421b3fbd3..73582b787 100644 --- a/pcre2/testdata/testinput4 +++ b/pcre2/testdata/testinput4 @@ -3,20 +3,21 @@ # some of the property tests may differ because of different versions of # Unicode in use by PCRE2 and Perl. +#newline_default lf anycrlf any #perltest /a.b/utf acb a\x7fb a\x{100}b - *** Failers +\= Expect no match a\nb /a(.{3})b/utf a\x{4000}xyb a\x{4000}\x7fyb a\x{4000}\x{100}yb - *** Failers +\= Expect no match a\x{4000}b ac\ncb @@ -54,7 +55,7 @@ a\x{1234}xyb a\x{1234}\x{4321}yb a\x{1234}\x{4321}\x{3412}b - *** Failers +\= Expect no match a\x{1234}b ac\ncb @@ -64,7 +65,7 @@ a\x{1234}\x{4321}\x{3412}b axxxxbcdefghijb a\x{1234}\x{4321}\x{3412}\x{3421}b - *** Failers +\= Expect no match a\x{1234}b /a(.{3,}?)b/utf @@ -73,7 +74,7 @@ a\x{1234}\x{4321}\x{3412}b axxxxbcdefghijb a\x{1234}\x{4321}\x{3412}\x{3421}b - *** Failers +\= Expect no match a\x{1234}b /a(.{3,5})b/utf @@ -84,7 +85,7 @@ a\x{1234}\x{4321}\x{3412}\x{3421}b axbxxbcdefghijb axxxxxbcdefghijb - *** Failers +\= Expect no match a\x{1234}b axxxxxxbcdefghijb @@ -96,12 +97,12 @@ a\x{1234}\x{4321}\x{3412}\x{3421}b axbxxbcdefghijb axxxxxbcdefghijb - *** Failers +\= Expect no match a\x{1234}b axxxxxxbcdefghijb /^[a\x{c0}]/utf - *** Failers +\= Expect no match \x{100} /(?<=aXb)cd/utf @@ -115,19 +116,19 @@ /(?:\x{100}){3}b/utf \x{100}\x{100}\x{100}b - *** Failers +\= Expect no match \x{100}\x{100}b /\x{ab}/utf \x{ab} \xc2\xab - *** Failers +\= Expect no match \x00{ab} /(?<=(.))X/utf WXYZ \x{256}XYZ - *** Failers +\= Expect no match XYZ /[^a]+/g,utf @@ -214,199 +215,185 @@ /\D+/utf 12abcd34 - *** Failers +\= Expect no match 1234 /\D{2,3}/utf 12abcd34 12ab34 - *** Failers +\= Expect no match 1234 12a34 /\D{2,3}?/utf 12abcd34 12ab34 - *** Failers +\= Expect no match 1234 12a34 /\d+/utf 12abcd34 - *** Failers /\d{2,3}/utf 12abcd34 1234abcd - *** Failers +\= Expect no match 1.4 /\d{2,3}?/utf 12abcd34 1234abcd - *** Failers +\= Expect no match 1.4 /\S+/utf 12abcd34 - *** Failers +\= Expect no match \ \ /\S{2,3}/utf 12abcd34 1234abcd - *** Failers +\= Expect no match \ \ /\S{2,3}?/utf 12abcd34 1234abcd - *** Failers +\= Expect no match \ \ />\s+ <34 - *** Failers />\s{2,3} \s{2,3}? ^`|~ !\"#%&'()*,-./:;?@[\\]_{} \x{a1}\x{a7} @@ -2213,6 +2221,7 @@ \x{23a}\x{23a}\x{2c65}\x{23a} /^(\x{23a})\1*(....)/i,utf +\= Expect no match \x{23a}\x{2c65}\x{2c65}\x{2c65} \x{23a}\x{23a}\x{2c65}\x{23a} @@ -2221,12 +2230,62 @@ "[\S\V\H]"utf -/\C\X*TÓ…; -{0,6}\v+ F -/utf - Ó…\x0a +/[^\p{Any}]*+x/utf + x -/\C(\W?Å¿)'?{{/utf - \\C(\\W?Å¿)'?{{ +/[[:punct:]]/utf,ucp + \x{b4} + +/[[:^ascii:]]/utf,ucp + \x{100} + \x{200} + \x{300} + \x{37e} +\= Expect no match + aa + 99 + +/[[:^ascii:]\w]/utf,ucp + aa + 99 + gg + \x{100} + \x{200} + \x{300} + \x{37e} + +/[\w[:^ascii:]]/utf,ucp + aa + 99 + gg + \x{100} + \x{200} + \x{300} + \x{37e} + +/[^[:ascii:]\W]/utf,ucp + \x{100} + \x{200} +\= Expect no match + aa + 99 + gg + \x{37e} + +/[^[:^ascii:]\d]/utf,ucp + a + ~ + \a + \x{7f} +\= Expect no match + 0 + \x{389} + \x{20ac} + +/(?=.*b)\pL/ + 11bb + +/(?(?=.*b)(?=.*b)\pL|.*c)/ + 11bb # End of testinput4 diff --git a/pcre2/testdata/testinput5 b/pcre2/testdata/testinput5 index 7e2ba45d6..e5a43e5ee 100644 --- a/pcre2/testdata/testinput5 +++ b/pcre2/testdata/testinput5 @@ -3,6 +3,8 @@ # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12). +#newline_default lf any anycrlf + # PCRE2 and Perl disagree about the characteristics of certain Unicode # characters. For example, 061C is considered by Perl to be Arabic, though # is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are @@ -11,11 +13,11 @@ # test 4. /^[\p{Arabic}]/utf - ** Failers +\= Expect no match \x{061c} /^[[:graph:]]+$/utf,ucp - ** Failers +\= Expect no match \x{61c} \x{2066} \x{2067} @@ -23,7 +25,7 @@ \x{2069} /^[[:print:]]+$/utf,ucp - ** Failers +\= Expect no match \x{61c} \x{2066} \x{2067} @@ -54,6 +56,7 @@ A\x{85}\x{2005}Z /^[[:graph:]]+$/utf,ucp +\= Expect no match \x{180e} /^[[:print:]]+$/utf,ucp @@ -63,6 +66,7 @@ \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} /^[[:^print:]]+$/utf,ucp +\= Expect no match \x{180e} # End of U+180E tests. @@ -109,12 +113,9 @@ /.{3,5}?/IB,utf \x{212ab}\x{212ab}\x{212ab}\x{861} -/(?<=\C)X/utf - Should produce an error diagnostic - /^[ab]/IB,utf bar - *** Failers +\= Expect no match c \x{ff} \x{100} @@ -123,7 +124,7 @@ c \x{ff} \x{100} - *** Failers +\= Expect no match aaa /\x{100}*(\d+|"(?1)")/utf @@ -133,7 +134,7 @@ "\x{100}1234" \x{100}\x{100}12ab \x{100}\x{100}"12" - *** Failers +\= Expect no match \x{100}\x{100}abcd /\x{100}*/IB,utf @@ -147,7 +148,7 @@ /[Ä€-Ä„]/utf \x{100} \x{104} - *** Failers +\= Expect no match \x{105} \x{ff} @@ -217,7 +218,7 @@ a\x{85}b a\x{2028}b a\x{2029}b - ** Failers +\= Expect no match a\n\rb /^a\R*b/bsr=unicode,utf @@ -240,7 +241,7 @@ a\x{85}b a\n\rb a\n\r\x{85}\x0cb - ** Failers +\= Expect no match ab /^a\R{1,3}b/bsr=unicode,utf @@ -251,34 +252,34 @@ a\r\n\r\n\r\nb a\n\r\n\rb a\n\n\r\nb - ** Failers +\= Expect no match a\n\n\n\rb a\r /\H\h\V\v/utf X X\x0a X\x09X\x0b - ** Failers +\= Expect no match \x{a0} X\x0a /\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c - ** Failers +\= Expect no match \x09\x20\x{a0}\x0a\x0b /\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} - ** Failers +\= Expect no match \x{2009} X\x0a /\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a \x09\x20\x{202f}\x0a\x0b\x0c - ** Failers +\= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b /[\h]/B,utf @@ -300,7 +301,7 @@ a\rb a\nb a\r\nb - ** Failers +\= Expect no match a\x{85}b a\x0bb @@ -315,7 +316,7 @@ a\rb a\nb a\r\nb - ** Failers +\= Expect no match a\x{85}b a\x0bb @@ -325,11 +326,10 @@ a\r\nb a\x{85}b a\x0bb - ** Failers /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR - ** Failers +\= Expect no match a\x{2029}b \x61\xe2\x80\xa9\x62 @@ -338,13 +338,13 @@ /a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref a\x{1234}b a\nb - ** Failers +\= Expect no match ab /a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref aXb a\nX\nX\x{1234}b - ** Failers +\= Expect no match ab /(\x{de})\1/ @@ -396,6 +396,7 @@ X\x{123}\x{123}\x{123}\x{123}\=ps /X\x{123}{2,4}b/utf +\= Expect no match Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps @@ -403,6 +404,7 @@ X\x{123}\x{123}\x{123}\x{123}x\=ps /X\x{123}{2,4}?b/utf +\= Expect no match Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps @@ -410,6 +412,7 @@ X\x{123}\x{123}\x{123}\x{123}x\=ps /X\x{123}{2,4}+b/utf +\= Expect no match Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps @@ -804,6 +807,7 @@ /(?<=\x{1234}\x{1234})\bxy/I,utf /(?\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} >\x{a0} - ** Failers +\= Expect no match \x{0b} /^>\p{Xsp}+/utf @@ -1082,7 +1086,7 @@ /^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} >\x{a0} - ** Failers +\= Expect no match \x{0b} /^>\p{Xps}+/utf @@ -1113,7 +1117,7 @@ \x{a6c} \x{10a7} _ABC - ** Failers +\= Expect no match [] /^\p{Xwd}+/utf @@ -1138,7 +1142,7 @@ \x{a6c} \x{10a7} _ABC - ** Failers +\= Expect no match [] /^[\p{Xwd}]+/utf @@ -1232,7 +1236,7 @@ /\b...\B/utf abc_ - ** Failers +\= Expect no match \x{37e}abc\x{376} \x{37e}\x{376}\x{371}\x{393}\x{394} !\x{c0}++\x{c1}\x{c2} @@ -1288,9 +1292,11 @@ # These behaved oddly in Perl, so they are kept in this test /(\x{23a}\x{23a}\x{23a})?\1/i,utf +\= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} /(ȺȺȺ)?\1/i,utf +\= Expect no match ȺȺȺⱥⱥ /(\x{23a}\x{23a}\x{23a})?\1/i,utf @@ -1300,9 +1306,11 @@ ȺȺȺⱥⱥⱥ /(\x{23a}\x{23a}\x{23a})\1/i,utf +\= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} /(ȺȺȺ)\1/i,utf +\= Expect no match ȺȺȺⱥⱥ /(\x{23a}\x{23a}\x{23a})\1/i,utf @@ -1328,19 +1336,19 @@ /^[\p{Batak}]/utf \x{1bc0} \x{1bff} - ** Failers +\= Expect no match \x{1bf4} /^[\p{Brahmi}]/utf \x{11000} \x{1106f} - ** Failers +\= Expect no match \x{1104e} /^[\p{Mandaic}]/utf \x{840} \x{85e} - ** Failers +\= Expect no match \x{85c} \x{85d} @@ -1355,11 +1363,9 @@ /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aX41z - *** Failers +\= Expect no match aAz -/(?<=ab\Cde)X/utf - /\X/ a\=ps a\=ph @@ -1453,7 +1459,7 @@ \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}++./i,utf,aftertext - ** Failers +\= Expect no match \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}*\x{3c2}/Bi,utf @@ -1463,19 +1469,24 @@ /[^a]*\x{3c2}/Bi,utf /ist/Bi,utf +\= Expect no match ikt /is+t/i,utf iSs\x{17f}t +\= Expect no match ikt /is+?t/i,utf +\= Expect no match ikt /is?t/i,utf +\= Expect no match ikt /is{2}t/i,utf +\= Expect no match iskt # This property is a PCRE special @@ -1485,52 +1496,52 @@ @abc `abc \x{1234}abc - ** Failers +\= Expect no match abc /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}+?/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}{3,5}?/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\P{Xuc}/utf abc - ** Failers +\= Expect no match $abc @abc `abc @@ -1538,7 +1549,7 @@ /^[\P{Xuc}]/utf abc - ** Failers +\= Expect no match $abc @abc `abc @@ -1603,13 +1614,13 @@ /[\p{L}ab]{2,3}+/B,no_auto_possess -/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx +/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx /.+\X/Bsx /\X+$/Bmx -/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx +/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp @@ -1634,9 +1645,7 @@ 123ábc123 /(?<=abc)(|def)/g,utf,replace=<$0> - 123abcáyzabcdef789abcሴqr - -/[^\xff]((?1))/utf,debug + 123abcáyzabcdef789abcሴqr /[A-`]/iB,utf abcdefghijklmno @@ -1651,4 +1660,107 @@ /$(&.+[\p{Me}].\s\xdcC*?(?())(?)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ +"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" + +/[\pS#moq]/ + = + +/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark + cxxxz + +/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended + abcd + +/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended + a\x{e0}\x{101}\x{c0}\x{102} + +/((?\d)|(?\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> + ab12cde + +/(*UCP)(*UTF)[[:>:]]X/B + +/abc/utf,replace=xyz + abc\=zero_terminate + +/a[[:punct:]b]/ucp,bincode + +/a[[:punct:]b]/utf,ucp,bincode + +/a[b[:punct:]]/utf,ucp,bincode + +/[[:^ascii:]]/utf,ucp,bincode + +/[[:^ascii:]\w]/utf,ucp,bincode + +/[\w[:^ascii:]]/utf,ucp,bincode + +/[^[:ascii:]\W]/utf,ucp,bincode + \x{de} + \x{200} +\= Expect no match + \x{300} + \x{37e} + +/[[:^ascii:]a]/utf,ucp,bincode + +/L(?#(|++abc>([^()]|\((?1)*\))*a*)\d/ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9876 - *** Failers +\= Expect no match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/x @@ -374,33 +371,33 @@ hij> def> - *** Failers +\= Expect no match >>aaabxyzpqrrrabbxyyyypqAzz >aaaabxyzpqrrrabbxyyyypqAzz >>>>abcxyzpqrrrabbxyyyypqAzz - *** Failers +\= Expect no match abxyzpqrrabbxyyyypqAzz abxyzpqrrrrabbxyyyypqAzz abxyzpqrrrabxyyyypqAzz @@ -562,7 +559,7 @@ /^(abc){1,2}zz/ abczz abcabczz - *** Failers +\= Expect no match zz abcabcabczz >>abczz @@ -576,7 +573,7 @@ aac abbbbbbbbbbbc bbbbbbbbbbbac - *** Failers +\= Expect no match aaac abbbbbbbbbbbac @@ -589,7 +586,7 @@ aac abbbbbbbbbbbc bbbbbbbbbbbac - *** Failers +\= Expect no match aaac abbbbbbbbbbbac @@ -600,7 +597,7 @@ babc bbabc bababc - *** Failers +\= Expect no match bababbc babababc @@ -608,7 +605,7 @@ babc bbabc bababc - *** Failers +\= Expect no match bababbc babababc @@ -622,7 +619,7 @@ cthing dthing ething - *** Failers +\= Expect no match fthing [thing \\thing @@ -632,7 +629,7 @@ cthing dthing ething - *** Failers +\= Expect no match athing fthing @@ -640,7 +637,7 @@ fthing [thing \\thing - *** Failers +\= Expect no match athing bthing ]thing @@ -651,7 +648,7 @@ /^[^]cde]/ athing fthing - *** Failers +\= Expect no match ]thing cthing dthing @@ -676,7 +673,7 @@ 9 10 100 - *** Failers +\= Expect no match abc /^.*nter/ @@ -687,28 +684,28 @@ /^xxx[0-9]+$/ xxx0 xxx1234 - *** Failers +\= Expect no match xxx /^.+[0-9][0-9][0-9]$/ x123 xx123 123456 - *** Failers - 123 x1234 +\= Expect no match + 123 /^.+?[0-9][0-9][0-9]$/ x123 xx123 123456 - *** Failers - 123 x1234 +\= Expect no match + 123 /^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ abc!pqr=apquxz.ixr.zzz.ac.uk - *** Failers +\= Expect no match !pqr=apquxz.ixr.zzz.ac.uk abc!=apquxz.ixr.zzz.ac.uk abc!pqr=apquxz:ixr.zzz.ac.uk @@ -716,7 +713,8 @@ /:/ Well, we need a colon: somewhere - *** Fail if we don't +\= Expect no match + No match without a colon /([\da-f:]+)$/i 0abc @@ -727,7 +725,7 @@ 5f03:12C0::932e fed def Any old stuff - *** Failers +\= Expect no match 0zzz gzzz fed\x20 @@ -736,7 +734,7 @@ /^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/ .1.2.3 A.12.123.0 - *** Failers +\= Expect no match .1.2.3333 1.2.3 1234.2.3 @@ -744,7 +742,7 @@ /^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ 1 IN SOA non-sp1 non-sp2( 1 IN SOA non-sp1 non-sp2 ( - *** Failers +\= Expect no match 1IN SOA non-sp1 non-sp2( /^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/ @@ -754,7 +752,7 @@ ab-c.pq-r. sxk.zzz.ac.uk. x-.y-. - *** Failers +\= Expect no match -abc.peq. /^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/ @@ -762,7 +760,7 @@ *.b0-a *.c3-b.c *.c-a.b-c - *** Failers +\= Expect no match *.0 *.a- *.a-b.c- @@ -786,29 +784,28 @@ \"1234\" \"abcd\" ; \"\" ; rhubarb - *** Failers +\= Expect no match \"1234\" : things /^$/ \ - *** Failers / ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x ab c - *** Failers +\= Expect no match abc ab cde /(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ ab c - *** Failers +\= Expect no match abc ab cde /^ a\ b[c ]d $/x a bcd a b d - *** Failers +\= Expect no match abcd ab d @@ -862,7 +859,7 @@ 1234567890 12345678ab 12345678__ - *** Failers +\= Expect no match 1234567 /^[aeiou\d]{4,5}$/ @@ -870,7 +867,7 @@ 1234 12345 aaaaa - *** Failers +\= Expect no match 123456 /^[aeiou\d]{4,5}?/ @@ -886,7 +883,7 @@ /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ From abcd Mon Sep 01 12:33:02 1997 From abcd Mon Sep 1 12:33:02 1997 - *** Failers +\= Expect no match From abcd Sep 01 12:33:02 1997 /^12.34/s @@ -907,7 +904,7 @@ /^(\D*)(?=\d)(?!123)/ abc456 - *** Failers +\= Expect no match abc123 /^1234(?# test newlines @@ -927,12 +924,12 @@ /(?!^)abc/ the abc - *** Failers +\= Expect no match abc /(?=^)abc/ abc - *** Failers +\= Expect no match the abc /^[ab]{1,3}(ab*|b)/no_auto_possess @@ -1148,7 +1145,7 @@ A. Other (a comment) \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay A missing angle (a comment) \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay A missing angle .*/)foo" +\= Expect no match /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ "(?>.*/)foo" @@ -2396,12 +2401,12 @@ /(?>(\.\d\d[1-9]?))\d+/ 1.230003938 1.875000282 - *** Failers +\= Expect no match 1.235 /^((?>\w+)|(?>\s+))*$/ now is the time for all good men to come to the aid of the party - *** Failers +\= Expect no match this is not a line with only words and spaces! /(\d+)(\w)/ @@ -2410,7 +2415,7 @@ /((?>\d+))(\w)/ 12345a - *** Failers +\= Expect no match 12345+ /(?>a+)b/ @@ -2437,32 +2442,32 @@ /\(((?>[^()]+)|\([^()]+\))+\)/ (abc) (abc(def)xyz) - *** Failers +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /a(?-i)b/i ab Ab - *** Failers +\= Expect no match aB AB /(a (?x)b c)d e/ a bcd e - *** Failers +\= Expect no match a b cd e abcd e a bcde /(a b(?x)c d (?-x)e f)/ a bcde f - *** Failers +\= Expect no match abcdef /(a(?i)b)c/ abc aBc - *** Failers +\= Expect no match abC aBC Abc @@ -2473,7 +2478,7 @@ /a(?i:b)c/ abc aBc - *** Failers +\= Expect no match ABC abC aBC @@ -2481,14 +2486,14 @@ /a(?i:b)*c/ aBc aBBc - *** Failers +\= Expect no match aBC aBBC /a(?=b(?i)c)\w\wd/ abcd abCd - *** Failers +\= Expect no match aBCd abcD @@ -2496,7 +2501,7 @@ more than million more than MILLION more \n than Million - *** Failers +\= Expect no match MORE THAN MILLION more \n than \n million @@ -2504,7 +2509,7 @@ more than million more than MILLION more \n than Million - *** Failers +\= Expect no match MORE THAN MILLION more \n than \n million @@ -2512,7 +2517,7 @@ abc aBbc aBBc - *** Failers +\= Expect no match Abc abAb abbC @@ -2520,7 +2525,7 @@ /(?=a(?i)b)\w\wc/ abc aBc - *** Failers +\= Expect no match Ab abC aBC @@ -2528,7 +2533,7 @@ /(?<=a(?i)b)(\w\w)c/ abxxc aBxxc - *** Failers +\= Expect no match Abxxc ABxxc abxxC @@ -2536,14 +2541,14 @@ /^(?(?=abc)\w{3}:|\d\d)$/ abc: 12 - *** Failers +\= Expect no match 123 xyz /^(?(?!abc)\d\d|\w{3}:)$/ abc: 12 - *** Failers +\= Expect no match 123 xyz @@ -2552,7 +2557,7 @@ cat fcat focat - *** Failers +\= Expect no match foocat /(?(?a*)*/ @@ -2629,7 +2634,7 @@ /(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /x 12-sep-98 12-09-98 - *** Failers +\= Expect no match sep-12-98 /(?i:saturday|sunday)/ @@ -2646,7 +2651,7 @@ aBCx bbx BBx - *** Failers +\= Expect no match abcX aBCX bbX @@ -2660,7 +2665,7 @@ Europe frog France - *** Failers +\= Expect no match Africa /^(ab|a(?i)[b-c](?m-i)d|x(?i)y|z)/ @@ -2670,13 +2675,13 @@ xY zebra Zambesi - *** Failers +\= Expect no match aCD XY /(?<=foo\n)^bar/m foo\nbar - *** Failers +\= Expect no match bar baz\nbar @@ -2684,18 +2689,18 @@ barbaz barbarbaz koobarbaz - *** Failers +\= Expect no match baz foobarbaz -/The following tests are taken from the Perl 5.005 test suite; some of them/ -/are compatible with 5.004, but I'd rather not have to sort them out./ +# The following tests are taken from the Perl 5.005 test suite; some of them +# are compatible with 5.004, but I'd rather not have to sort them out. /abc/ abc xabcy ababc - *** Failers +\= Expect no match xbc axc abx @@ -2719,7 +2724,7 @@ /ab+bc/ abbc - *** Failers +\= Expect no match abc abq @@ -2736,7 +2741,7 @@ abbbbc /ab{4,5}bc/ - *** Failers +\= Expect no match abq abbbbc @@ -2757,7 +2762,7 @@ /^abc$/ abc - *** Failers +\= Expect no match abbbbc abcc @@ -2768,8 +2773,8 @@ /abc$/ aabc - *** Failers aabc +\= Expect no match aabcd /^/ @@ -2787,7 +2792,7 @@ /a[bc]d/ abd - *** Failers +\= Expect no match axyzd abc @@ -2811,7 +2816,7 @@ /a[^bc]d/ aed - *** Failers +\= Expect no match abd abd @@ -2820,8 +2825,8 @@ /a[^]b]c/ adc - *** Failers a-c +\= Expect no match a]c /\ba\b/ @@ -2830,13 +2835,13 @@ -a- /\by\b/ - *** Failers +\= Expect no match xy yz xyz /\Ba\B/ - *** Failers +\= Expect no match a- -a -a- @@ -2855,8 +2860,7 @@ /\W/ - - *** Failers - - +\= Expect no match a /a\sb/ @@ -2864,8 +2868,7 @@ /a\Sb/ a-b - *** Failers - a-b +\= Expect no match a b /\d/ @@ -2873,8 +2876,7 @@ /\D/ - - *** Failers - - +\= Expect no match 1 /[\w]/ @@ -2882,8 +2884,7 @@ /[\W]/ - - *** Failers - - +\= Expect no match a /a[\s]b/ @@ -2891,8 +2892,7 @@ /a[\S]b/ a-b - *** Failers - a-b +\= Expect no match a b /[\d]/ @@ -2900,8 +2900,7 @@ /[\D]/ - - *** Failers - - +\= Expect no match 1 /ab|cd/ @@ -2921,6 +2920,8 @@ a((b /a\\b/ + a\\b +\= Expect no match a\b /((a))/ @@ -2960,12 +2961,10 @@ cde /abc/ - *** Failers +\= Expect no match b - /a*/ - /([abc])*d/ abbbcd @@ -3019,7 +3018,7 @@ adcdcde /a[bcd]+dcdcde/ - *** Failers +\= Expect no match abcde adcdcde @@ -3039,7 +3038,7 @@ effgz ij reffgz - *** Failers +\= Expect no match effg bcdd @@ -3050,7 +3049,7 @@ a /multiple words of text/ - *** Failers +\= Expect no match aa uh-uh @@ -3078,7 +3077,7 @@ ABC XABCY ABABC - *** Failers +\= Expect no match aaxabxbaxbbx XBC AXC @@ -3101,7 +3100,7 @@ ABBC /ab+bc/i - *** Failers +\= Expect no match ABC ABQ @@ -3120,7 +3119,7 @@ ABBBBC /ab{4,5}?bc/i - *** Failers +\= Expect no match ABQ ABBBBC @@ -3141,7 +3140,7 @@ /^abc$/i ABC - *** Failers +\= Expect no match ABBBBC ABCC @@ -3167,8 +3166,8 @@ AXYZC /a.*c/i - *** Failers AABC +\= Expect no match AXYZD /a[bc]d/i @@ -3176,7 +3175,7 @@ /a[b-d]e/i ACE - *** Failers +\= Expect no match ABC ABD @@ -3200,7 +3199,7 @@ /a[^-b]c/i ADC - *** Failers +\= Expect no match ABD A-C @@ -3215,7 +3214,7 @@ DEF /$b/i - *** Failers +\= Expect no match A]C B @@ -3227,6 +3226,7 @@ A((B /a\\b/i +\= Expect no match A\=notbol /((a))/i @@ -3277,7 +3277,6 @@ /abc/i /a*/i - /([abc])*d/i ABBBCD @@ -3305,6 +3304,7 @@ HIJ /^(ab|cd)e/i +\= Expect no match ABCDE /(abc|)ef/i @@ -3349,7 +3349,7 @@ EFFGZ IJ REFFGZ - *** Failers +\= Expect no match ADCDCDE EFFG BCDD @@ -3367,7 +3367,7 @@ C /multiple words of text/i - *** Failers +\= Expect no match AA UH-UH @@ -3460,7 +3460,7 @@ /(?<=a)b/ ab - *** Failers +\= Expect no match cb b @@ -3506,7 +3506,7 @@ Ab /(?:(?i)a)b/ - *** Failers +\= Expect no match cb aB @@ -3525,7 +3525,7 @@ Ab /(?i:a)b/ - *** Failers +\= Expect no match aB aB @@ -3544,8 +3544,8 @@ aB /(?:(?-i)a)b/i - *** Failers aB +\= Expect no match Ab /((?-i)a)b/i @@ -3557,7 +3557,7 @@ aB /(?:(?-i)a)b/i - *** Failers +\= Expect no match Ab AB @@ -3576,7 +3576,7 @@ aB /(?-i:a)b/i - *** Failers +\= Expect no match AB Ab @@ -3589,14 +3589,14 @@ aB /(?-i:a)b/i - *** Failers +\= Expect no match Ab AB /((?-i:a))b/i /((?-i:a.))b/i - *** Failers +\= Expect no match AB a\nB @@ -3622,7 +3622,7 @@ aaac /(?.*)(?<=(abcd|wxyz))/ alphabetabcd endingwxyz - *** Failers +\= Expect no match a rather long string that doesn't end with one of them /word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark /word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope /(?<=\d{3}(?!999))foo/ 999foo 123999foo - *** Failers +\= Expect no match 123abcfoo /(?<=(?!...999)\d{3})foo/ 999foo 123999foo - *** Failers +\= Expect no match 123abcfoo /(?<=\d{3}(?!999)...)foo/ 123abcfoo 123456foo - *** Failers +\= Expect no match 123999foo /(?<=\d{3}...)(? - ** Failers +\= Expect no match xyz\nabclf xyz\rabclf /^abc/Im,newline=cr xyz\rabc - ** Failers +\= Expect no match xyz\nabc xyz\r\nabc @@ -4066,34 +4070,37 @@ aaaa /(a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /(?>a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /(?:a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /^a.b/newline=lf a\rb - ** Failers +\= Expect no match a\nb /^a.b/newline=cr a\nb - ** Failers +\= Expect no match a\rb /^a.b/newline=anycrlf a\x85b - ** Failers +\= Expect no match a\rb /^a.b/newline=any - ** Failers +\= Expect no match a\nb a\rb a\x85b @@ -4111,7 +4118,7 @@ a\x0bb a\x0cb a\x85b - ** Failers +\= Expect no match a\n\rb /^a\R*b/bsr=unicode @@ -4134,7 +4141,7 @@ a\x85b a\n\rb a\n\r\x85\x0cb - ** Failers +\= Expect no match ab /^a\R{1,3}b/bsr=unicode @@ -4145,25 +4152,25 @@ a\r\n\r\n\r\nb a\n\r\n\rb a\n\n\r\nb - ** Failers +\= Expect no match a\n\n\n\rb a\r /.+foo/ afoo - ** Failers +\= Expect no match \r\nfoo \nfoo /.+foo/newline=crlf afoo \nfoo - ** Failers +\= Expect no match \r\nfoo /.+foo/newline=any afoo - ** Failers +\= Expect no match \nfoo \r\nfoo @@ -4175,12 +4182,12 @@ /^$/gm,newline=any abc\r\rxyz abc\n\rxyz - ** Failers +\= Expect no match abc\r\nxyz /^X/m XABC - ** Failers +\= Expect no match XABC\=notbol /(?m)^$/g,newline=any,aftertext @@ -4207,20 +4214,20 @@ /(?|(abc)|(xyz))(?1)/ abcabc xyzabc - ** Failers +\= Expect no match xyzxyz /\H\h\V\v/ X X\x0a X\x09X\x0b - ** Failers +\= Expect no match \xa0 X\x0a /\H*\h+\V?\v{3,4}/ \x09\x20\xa0X\x0a\x0b\x0c\x0d\x0a \x09\x20\xa0\x0a\x0b\x0c\x0d\x0a \x09\x20\xa0\x0a\x0b\x0c - ** Failers +\= Expect no match \x09\x20\xa0\x0a\x0b /\H{3,4}/ @@ -4233,7 +4240,7 @@ /\h*X\h?\H+Y\H?Z/ >XNNNYZ > X NYQZ - ** Failers +\= Expect no match >XYZ > X NY Z @@ -4242,6 +4249,7 @@ >\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c /.+A/newline=crlf +\= Expect no match \r\nA /\nA/newline=crlf @@ -4257,7 +4265,7 @@ a\rb a\nb a\r\nb - ** Failers +\= Expect no match a\x85b a\x0bb @@ -4272,7 +4280,7 @@ a\rb a\nb a\r\nb - ** Failers +\= Expect no match a\x85b a\x0bb @@ -4287,49 +4295,49 @@ a\r\n\nb a\n\r\rb a\r\n\r\n\r\n\r\nb - ** Failers +\= Expect no match + a\x0b\x0bb a\x85\x85b - a\x0b\0bb /a\R{2,4}b/I,bsr=unicode a\r\rb a\n\n\nb a\r\n\n\r\rb a\x85\x85b - a\x0b\0bb - ** Failers + a\x0b\x0bb +\= Expect no match a\r\r\r\r\rb /a(?!)|\wbc/ abc /a[]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers +\= Expect no match ab /a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers +\= Expect no match ab /a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers +\= Expect no match ab /a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb a\nb - ** Failers +\= Expect no match ab /a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb a\nX\nXb - ** Failers +\= Expect no match ab /X$/dollar_endonly X - ** Failers +\= Expect no match X\n /X$/ @@ -4339,13 +4347,13 @@ /xyz/auto_callout xyz abcxyz - ** Failers +\= Expect no match abc abcxypqr /xyz/auto_callout,no_start_optimize abcxyz - ** Failers +\= Expect no match abc abcxypqr @@ -4369,10 +4377,12 @@ ++++123999Y\=ps /Z(*F)/ +\= Expect no match Z\=ps ZA\=ps /Z(?!)/ +\= Expect no match Z\=ps ZA\=ps @@ -4394,6 +4404,7 @@ /Z(*F)Q|ZXY/ Z\=ps +\= Expect no match ZA\=ps X\=ps @@ -4432,13 +4443,13 @@ /(abc|def|xyz)/I terhjk;abcdaadsfe the quick xyz brown fox - ** Failers +\= Expect no match thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd /(abc|def|xyz)/I,no_start_optimize terhjk;abcdaadsfe the quick xyz brown fox - ** Failers +\= Expect no match thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd /abcd*/aftertext @@ -4465,13 +4476,13 @@ /(?:(?1)|B)(A(*F)|C)/ ABCD CCD - ** Failers +\= Expect no match CAD /^(?:(?1)|B)(A(*F)|C)/ CCD BCD - ** Failers +\= Expect no match ABCD CAD BAD @@ -4480,7 +4491,6 @@ ac /^(?=a(*SKIP)b|ac)/ - ** Failers ac /^(?=a(*THEN)b|ac)/ @@ -4488,8 +4498,6 @@ /^(?=a(*PRUNE)b)/ ab - ** Failers - ac /^(?(?!a(*SKIP)b))/ ac @@ -4526,68 +4534,71 @@ abc\=ph /abc\B/ - abc abc\=ps abc\=ph +\= Expect no match + abc /.+/ abc\=offset=0 abc\=offset=1 abc\=offset=2 - abc\=offset=3 +\= Bad offsets abc\=offset=4 abc\=offset=-4 +\= Expect no match + abc\=offset=3 /^(?:a)++\w/ aaaab - ** Failers +\= Expect no match aaaa bbb /^(?:aa|(?:a)++\w)/ aaaab aaaa - ** Failers +\= Expect no match bbb /^(?:a)*+\w/ aaaab bbb - ** Failers +\= Expect no match aaaa /^(a)++\w/ aaaab - ** Failers +\= Expect no match aaaa bbb /^(a|)++\w/ aaaab - ** Failers +\= Expect no match aaaa bbb /(?=abc){3}abc/aftertext abcabcabc - ** Failers +\= Expect no match xyz /(?=abc)+abc/aftertext abcabcabc - ** Failers +\= Expect no match xyz /(?=abc)++abc/aftertext abcabcabc - ** Failers +\= Expect no match xyz /(?=abc){0}xyz/ xyz /(?=abc){1}xyz/ - ** Failers +\= Expect no match xyz /(?=(a))?./ @@ -4624,7 +4635,7 @@ /((?(R)a+|(?1)b))/ aaaabcde -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde /(?(R)a*(?1)|((?R))b)/ @@ -4634,12 +4645,6 @@ aaaa\=ovector=3 aaaa\=ovector=4 -/ab\Cde/ - abXde - -/(?<=ab\Cde)X/ - abZdeX - /^\R/ \r\=ps \r\=ph @@ -4842,12 +4847,12 @@ aaaXY # Binary zero in callout string -# a ( ? C ' x z ' ) b -/ 61 28 3f 43 27 78 00 7a 27 29 62/hex +/"a(?C'x" 00 "z')b"/hex abcdefgh /(?(?!)a|b)/ bbb +\= Expect no match aaa /^/gm @@ -4856,4 +4861,29 @@ /^/gm,alt_circumflex \n\n\n +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 1234abcde\=offset_limit=9 + 1234abcde\=offset_limit=4 + 1234abcde\=offset_limit=4,offset=4 +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 + 1234abcde\=offset_limit=3 + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 +\= Expect no match + 1234abc\=offset_limit=6 + +/abcd/null_context + abcd\=null_context + +/()()a+/no_auto_possess + aaa\=allcaptures + a\=allcaptures + +/(*LIMIT_RECURSION=100)^((.)(?1)|.)$/ +\= Expect recursion limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] + # End of testinput6 diff --git a/pcre2/testdata/testinput7 b/pcre2/testdata/testinput7 index 879a414cf..ef3022358 100644 --- a/pcre2/testdata/testinput7 +++ b/pcre2/testdata/testinput7 @@ -3,6 +3,7 @@ # used to force DFA matching for all tests. #subject dfa +#newline_default LF any anyCRLF /\x{100}ab/utf \x{100}ab @@ -15,19 +16,19 @@ /a\x{100}+b/utf a\x{100}b a\x{100}\x{100}b - *** Failers +\= Expect no match ab /\bX/utf Xoanon +Xoanon \x{300}Xoanon - *** Failers +\= Expect no match YXoanon /\BX/utf YXoanon - *** Failers +\= Expect no match Xoanon +Xoanon \x{300}Xoanon @@ -36,12 +37,12 @@ X+oanon ZX\x{300}oanon FAX - *** Failers +\= Expect no match Xoanon /X\B/utf Xoanon - *** Failers +\= Expect no match X+oanon ZX\x{300}oanon FAX @@ -54,7 +55,7 @@ ab99 \x{123}\x{123}45 \x{400}\x{401}\x{402}6 - *** Failers +\= Expect no match d99 \x{123}\x{122}4 \x{400}\x{403}6 @@ -64,14 +65,14 @@ acb a\x7fb a\x{100}b - *** Failers +\= Expect no match a\nb /a(.{3})b/utf a\x{4000}xyb a\x{4000}\x7fyb a\x{4000}\x{100}yb - *** Failers +\= Expect no match a\x{4000}b ac\ncb @@ -109,7 +110,7 @@ a\x{1234}xyb a\x{1234}\x{4321}yb a\x{1234}\x{4321}\x{3412}b - *** Failers +\= Expect no match a\x{1234}b ac\ncb @@ -119,7 +120,7 @@ a\x{1234}\x{4321}\x{3412}b axxxxbcdefghijb a\x{1234}\x{4321}\x{3412}\x{3421}b - *** Failers +\= Expect no match a\x{1234}b /a(.{3,}?)b/utf @@ -128,7 +129,7 @@ a\x{1234}\x{4321}\x{3412}b axxxxbcdefghijb a\x{1234}\x{4321}\x{3412}\x{3421}b - *** Failers +\= Expect no match a\x{1234}b /a(.{3,5})b/utf @@ -139,7 +140,7 @@ a\x{1234}\x{4321}\x{3412}\x{3421}b axbxxbcdefghijb axxxxxbcdefghijb - *** Failers +\= Expect no match a\x{1234}b axxxxxxbcdefghijb @@ -151,12 +152,12 @@ a\x{1234}\x{4321}\x{3412}\x{3421}b axbxxbcdefghijb axxxxxbcdefghijb - *** Failers +\= Expect no match a\x{1234}b axxxxxxbcdefghijb /^[a\x{c0}]/utf - *** Failers +\= Expect no match \x{100} /(?<=aXb)cd/utf @@ -170,19 +171,19 @@ /(?:\x{100}){3}b/utf \x{100}\x{100}\x{100}b - *** Failers +\= Expect no match \x{100}\x{100}b /\x{ab}/utf \x{ab} \xc2\xab - *** Failers +\= Expect no match \x00{ab} /(?<=(.))X/utf WXYZ \x{256}XYZ - *** Failers +\= Expect no match XYZ /[^a]+/g,utf @@ -269,128 +270,122 @@ /\D+/utf 12abcd34 - *** Failers +\= Expect no match 1234 /\D{2,3}/utf 12abcd34 12ab34 - *** Failers +\= Expect no match 1234 12a34 /\D{2,3}?/utf 12abcd34 12ab34 - *** Failers +\= Expect no match 1234 12a34 /\d+/utf 12abcd34 - *** Failers /\d{2,3}/utf 12abcd34 1234abcd - *** Failers +\= Expect no match 1.4 /\d{2,3}?/utf 12abcd34 1234abcd - *** Failers +\= Expect no match 1.4 /\S+/utf 12abcd34 - *** Failers +\= Expect no match \ \ /\S{2,3}/utf 12abcd34 1234abcd - *** Failers +\= Expect no match \ \ /\S{2,3}?/utf 12abcd34 1234abcd - *** Failers +\= Expect no match \ \ />\s+ <34 - *** Failers />\s{2,3} \s{2,3}? \p{Xsp}/utf >\x{1680}\x{2028}\x{0b} - ** Failers +\= Expect no match \x{0b} /^>\p{Xsp}+/utf,no_auto_possess @@ -1617,7 +1601,7 @@ /^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} >\x{a0} - ** Failers +\= Expect no match \x{0b} /^>\p{Xps}+/utf @@ -1648,7 +1632,7 @@ \x{a6c} \x{10a7} _ABC - ** Failers +\= Expect no match [] /^\p{Xwd}+/utf @@ -1667,7 +1651,7 @@ \x{a6c} \x{10a7} _ABC - ** Failers +\= Expect no match [] /^[\p{Xwd}]+/utf @@ -1686,7 +1670,7 @@ /\b...\B/utf abc_ - ** Failers +\= Expect no match \x{37e}abc\x{376} \x{37e}\x{376}\x{371}\x{393}\x{394} !\x{c0}++\x{c1}\x{c2} @@ -1748,7 +1732,7 @@ \x{1100}X \x{1100}\x{34e}X \x{1b04}\x{1b04}X - *These match up to the roman letters +\= These match up to the roman letters \x{1111}\x{1111}L,L \x{1111}\x{1111}\x{1169}L,L,V \x{1111}\x{ae4c}L, LV @@ -1759,7 +1743,7 @@ \x{1111}\x{ad89}\x{11fe}L, LVT, T \x{1111}\x{ad89}\x{11fe}\x{11fe}L, LVT, T, T \x{ad89}\x{11fe}\x{11fe}LVT, T, T - *These match just the first codepoint (invalid sequence) +\= These match just the first codepoint (invalid sequence) \x{1111}\x{11fe}L, T \x{ae4c}\x{1111}LV, L \x{ae4c}\x{ae4c}LV, LV @@ -1775,18 +1759,18 @@ \x{11fe}\x{1169}T, V \x{11fe}\x{ae4c}T, LV \x{11fe}\x{ad89}T, LVT - *Test extend and spacing mark +\= Test extend and spacing mark \x{1111}\x{ae4c}\x{0711}L, LV, extend \x{1111}\x{ae4c}\x{1b04}L, LV, spacing mark \x{1111}\x{ae4c}\x{1b04}\x{0711}\x{1b04}L, LV, spacing mark, extend, spacing mark - *Test CR, LF, and control +\= Test CR, LF, and control \x0d\x{0711}CR, extend \x0d\x{1b04}CR, spacingmark \x0a\x{0711}LF, extend \x0a\x{1b04}LF, spacingmark \x0b\x{0711}Control, extend \x09\x{1b04}Control, spacingmark - *There are no Prepend characters, so we can't test Prepend, CR +\= There are no Prepend characters, so we can't test Prepend, CR /^(?>\X{2})X/utf,aftertext \x{1111}\x{ae4c}\x{1111}\x{ae4c}X @@ -1840,7 +1824,6 @@ /\x{212b}+/i,utf \x{00c5}\x{00e5}\x{212b} - /\x{01c4}+/i,utf \x{01c4}\x{01c5}\x{01c6} @@ -1850,7 +1833,6 @@ /\x{01c6}+/i,utf \x{01c4}\x{01c5}\x{01c6} - /\x{01c7}+/i,utf \x{01c7}\x{01c8}\x{01c9} @@ -1870,7 +1852,6 @@ /\x{01cc}+/i,utf \x{01ca}\x{01cb}\x{01cc} - /\x{01f1}+/i,utf \x{01f1}\x{01f2}\x{01f3} @@ -1880,7 +1861,6 @@ /\x{01f3}+/i,utf \x{01f1}\x{01f2}\x{01f3} - /\x{0345}+/i,utf \x{0345}\x{0399}\x{03b9}\x{1fbe} @@ -1893,7 +1873,6 @@ /\x{1fbe}+/i,utf \x{0345}\x{0399}\x{03b9}\x{1fbe} - /\x{0392}+/i,utf \x{0392}\x{03b2}\x{03d0} @@ -1913,7 +1892,6 @@ /\x{03f5}+/i,utf \x{0395}\x{03b5}\x{03f5} - /\x{0398}+/i,utf \x{0398}\x{03b8}\x{03d1}\x{03f4} @@ -1925,7 +1903,6 @@ /\x{03f4}+/i,utf \x{0398}\x{03b8}\x{03d1}\x{03f4} - /\x{039a}+/i,utf \x{039a}\x{03ba}\x{03f0} @@ -1936,7 +1913,6 @@ /\x{03f0}+/i,utf \x{039a}\x{03ba}\x{03f0} - /\x{03a0}+/i,utf \x{03a0}\x{03c0}\x{03d6} @@ -1946,7 +1922,6 @@ /\x{03d6}+/i,utf \x{03a0}\x{03c0}\x{03d6} - /\x{03a1}+/i,utf \x{03a1}\x{03c1}\x{03f1} @@ -1956,7 +1931,6 @@ /\x{03f1}+/i,utf \x{03a1}\x{03c1}\x{03f1} - /\x{03a3}+/i,utf \x{03A3}\x{03C2}\x{03C3} @@ -1965,7 +1939,6 @@ /\x{03c3}+/i,utf \x{03A3}\x{03C2}\x{03C3} - /\x{03a6}+/i,utf \x{03a6}\x{03c6}\x{03d5} @@ -1976,7 +1949,6 @@ /\x{03d5}+/i,utf \x{03a6}\x{03c6}\x{03d5} - /\x{03c9}+/i,utf \x{03c9}\x{03a9}\x{2126} @@ -1986,7 +1958,6 @@ /\x{2126}+/i,utf \x{03c9}\x{03a9}\x{2126} - /\x{1e60}+/i,utf \x{1e60}\x{1e61}\x{1e9b} @@ -1996,14 +1967,12 @@ /\x{1e9b}+/i,utf \x{1e60}\x{1e61}\x{1e9b} - /\x{1e9e}+/i,utf \x{1e9e}\x{00df} /\x{00df}+/i,utf \x{1e9e}\x{00df} - /\x{1f88}+/i,utf \x{1f88}\x{1f80} @@ -2019,7 +1988,6 @@ /\x{212a}+/i,utf \x{004b}\x{006b}\x{212a} - /\x{0053}+/i,utf \x{0053}\x{0073}\x{017f} @@ -2030,19 +1998,24 @@ \x{0053}\x{0073}\x{017f} /ist/i,utf +\= Expect no match ikt /is+t/i,utf iSs\x{17f}t +\= Expect no match ikt /is+?t/i,utf +\= Expect no match ikt /is?t/i,utf +\= Expect no match ikt /is{2}t/i,utf +\= Expect no match iskt /^\p{Xuc}/utf @@ -2050,52 +2023,52 @@ @abc `abc \x{1234}abc - ** Failers +\= Expect no match abc /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}+?/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\p{Xuc}{3,5}?/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** - ** Failers +\= Expect no match \x{9f} /^\P{Xuc}/utf abc - ** Failers +\= Expect no match $abc @abc `abc @@ -2103,7 +2076,7 @@ /^[\P{Xuc}]/utf abc - ** Failers +\= Expect no match $abc @abc `abc diff --git a/pcre2/testdata/testinput8 b/pcre2/testdata/testinput8 index 545e751c9..2627454f9 100644 --- a/pcre2/testdata/testinput8 +++ b/pcre2/testdata/testinput8 @@ -1,8 +1,11 @@ -# These are a few representative patterns whose lengths and offsets are to be -# shown when the link size is 2. This is just a doublecheck test to ensure the -# sizes don't go horribly wrong when something is changed. The pattern contents -# are all themselves checked in other tests. Unicode, including property -# support, is required for these tests. +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. #pattern fullbincode,memory @@ -67,7 +70,7 @@ /\xff/utf /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf - + /\x{D55c}\x{ad6d}\x{C5B4}/I,utf /\x{65e5}\x{672c}\x{8a9e}/I,utf @@ -150,10 +153,33 @@ # Check the absolute limit on nesting (?| etc. This varies with code unit # width because the workspace is a different number of bytes. It will fail -# in 8-bit and 16-bit but not in 32-bit. - +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) /parens_nest_limit=1000,-fullbincode +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 + +/(?(1)(?1)){8,}+()/debug + abcd + +/(?(1)|a(?1)b){2,}+()/debug + abcde + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + # End of testinput8 diff --git a/pcre2/testdata/testinput9 b/pcre2/testdata/testinput9 index eeb5edbbe..7be4b1580 100644 --- a/pcre2/testdata/testinput9 +++ b/pcre2/testdata/testinput9 @@ -2,11 +2,10 @@ # UTF-8 or Unicode property support. */ #forbid_utf +#newline_default lf any anycrlf -/a\Cb/ - aXb - a\nb - ** Failers (too big char) +/ab/ +\= Expect error message (too big char) and no match A\x{123}B A\o{443}B @@ -240,9 +239,15 @@ /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark XX +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark,alt_verbnames + XX + /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark XX +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark,alt_verbnames + XX + /\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames /[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames @@ -251,4 +256,8 @@ /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B +/(*MARK:a\x{100}b)z/alt_verbnames + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/ + # End of testinput9 diff --git a/pcre2/testdata/testoutput1 b/pcre2/testdata/testoutput1 index 942dc7d85..e55a63b7e 100644 --- a/pcre2/testdata/testoutput1 +++ b/pcre2/testdata/testoutput1 @@ -3,15 +3,17 @@ # 32-bit PCRE libraries, and also using the perltest.pl script. #forbid_utf +#newline_default lf any anycrlf #perltest /the quick brown fox/ the quick brown fox 0: the quick brown fox - The quick brown FOX -No match What do you know about the quick brown fox? 0: the quick brown fox +\= Expect no match + The quick brown FOX +No match What do you know about THE QUICK BROWN FOX? No match @@ -90,8 +92,7 @@ No match 0: aaaabxyzpqrrrabbxyyyypqAzz >>>>abcxyzpqrrrabbxyyyypqAzz 0: abcxyzpqrrrabbxyyyypqAzz - *** Failers -No match +\= Expect no match abxyzpqrrabbxyyyypqAzz No match abxyzpqrrrrabbxyyyypqAzz @@ -112,8 +113,7 @@ No match abcabczz 0: abcabczz 1: abc - *** Failers -No match +\= Expect no match zz No match abcabcabczz @@ -146,8 +146,7 @@ No match bbbbbbbbbbbac 0: bbbbbbbbbbbac 1: a - *** Failers -No match +\= Expect no match aaac No match abbbbbbbbbbbac @@ -178,8 +177,7 @@ No match bbbbbbbbbbbac 0: bbbbbbbbbbbac 1: a - *** Failers -No match +\= Expect no match aaac No match abbbbbbbbbbbac @@ -200,8 +198,7 @@ No match bababc 0: bababc 1: ba - *** Failers -No match +\= Expect no match bababbc No match babababc @@ -217,8 +214,7 @@ No match bababc 0: bababc 1: ba - *** Failers -No match +\= Expect no match bababbc No match babababc @@ -241,8 +237,7 @@ No match 0: d ething 0: e - *** Failers -No match +\= Expect no match fthing No match [thing @@ -259,8 +254,7 @@ No match 0: d ething 0: e - *** Failers -No match +\= Expect no match athing No match fthing @@ -273,8 +267,7 @@ No match 0: [ \\thing 0: \ - *** Failers - 0: * +\= Expect no match athing No match bthing @@ -293,8 +286,7 @@ No match 0: a fthing 0: f - *** Failers - 0: * +\= Expect no match ]thing No match cthing @@ -337,8 +329,7 @@ No match 0: 10 100 0: 100 - *** Failers -No match +\= Expect no match abc No match @@ -355,46 +346,42 @@ No match 0: xxx0 xxx1234 0: xxx1234 - *** Failers -No match +\= Expect no match xxx No match /^.+[0-9][0-9][0-9]$/ x123 0: x123 + x1234 + 0: x1234 xx123 0: xx123 123456 0: 123456 - *** Failers -No match +\= Expect no match 123 No match - x1234 - 0: x1234 /^.+?[0-9][0-9][0-9]$/ x123 0: x123 + x1234 + 0: x1234 xx123 0: xx123 123456 0: 123456 - *** Failers -No match +\= Expect no match 123 No match - x1234 - 0: x1234 /^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ abc!pqr=apquxz.ixr.zzz.ac.uk 0: abc!pqr=apquxz.ixr.zzz.ac.uk 1: abc 2: pqr - *** Failers -No match +\= Expect no match !pqr=apquxz.ixr.zzz.ac.uk No match abc!=apquxz.ixr.zzz.ac.uk @@ -407,7 +394,8 @@ No match /:/ Well, we need a colon: somewhere 0: : - *** Fail if we don't +\= Expect no match + Fail without a colon No match /([\da-f:]+)$/i @@ -435,8 +423,7 @@ No match Any old stuff 0: ff 1: ff - *** Failers -No match +\= Expect no match 0zzz No match gzzz @@ -457,8 +444,7 @@ No match 1: 12 2: 123 3: 0 - *** Failers -No match +\= Expect no match .1.2.3333 No match 1.2.3 @@ -477,8 +463,7 @@ No match 1: 1 2: non-sp1 3: non-sp2 - *** Failers -No match +\= Expect no match 1IN SOA non-sp1 non-sp2( No match @@ -498,8 +483,7 @@ No match x-.y-. 0: x-.y-. 1: .y- - *** Failers -No match +\= Expect no match -abc.peq. No match @@ -518,8 +502,7 @@ No match 1: -a 2: .b-c 3: -c - *** Failers -No match +\= Expect no match *.0 No match *.a- @@ -570,22 +553,21 @@ No match \"\" ; rhubarb 0: "" ; rhubarb 1: ; rhubarb - *** Failers -No match +\= Expect no match \"1234\" : things No match /^$/ \ 0: - *** Failers +\= Expect no match + A non-empty line No match / ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x ab c 0: ab c - *** Failers -No match +\= Expect no match abc No match ab cde @@ -594,8 +576,7 @@ No match /(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ ab c 0: ab c - *** Failers -No match +\= Expect no match abc No match ab cde @@ -606,8 +587,7 @@ No match 0: a bcd a b d 0: a b d - *** Failers -No match +\= Expect no match abcd No match ab d @@ -716,8 +696,7 @@ No match 0: 12345678ab 12345678__ 0: 12345678__ - *** Failers -No match +\= Expect no match 1234567 No match @@ -730,8 +709,7 @@ No match 0: 12345 aaaaa 0: aaaaa - *** Failers -No match +\= Expect no match 123456 No match @@ -756,8 +734,7 @@ No match 0: def=defdefdef 1: def 2: def - *** Failers -No match +\= Expect no match abc=defdef No match @@ -827,8 +804,7 @@ No match From abcd Mon Sep 1 12:33:02 1997 0: From abcd Mon Sep 1 12:33 1: Sep - *** Failers -No match +\= Expect no match From abcd Sep 01 12:33:02 1997 No match @@ -865,8 +841,7 @@ No match abc456 0: abc 1: abc - *** Failers -No match +\= Expect no match abc123 No match @@ -910,16 +885,14 @@ No match /(?!^)abc/ the abc 0: abc - *** Failers -No match +\= Expect no match abc No match /(?=^)abc/ abc 0: abc - *** Failers -No match +\= Expect no match the abc No match @@ -1151,8 +1124,7 @@ No match 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay A missing angle .*/)foo" - /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ -No match - "(?>.*/)foo" /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo +\= Expect no match + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ +No match /(?>(\.\d\d[1-9]?))\d+/ 1.230003938 @@ -3084,8 +2991,7 @@ No match 1.875000282 0: .875000282 1: .875 - *** Failers -No match +\= Expect no match 1.235 No match @@ -3093,8 +2999,7 @@ No match now is the time for all good men to come to the aid of the party 0: now is the time for all good men to come to the aid of the party 1: party - *** Failers -No match +\= Expect no match this is not a line with only words and spaces! No match @@ -3113,8 +3018,7 @@ No match 0: 12345a 1: 12345 2: a - *** Failers -No match +\= Expect no match 12345+ No match @@ -3152,8 +3056,7 @@ No match (abc(def)xyz) 0: (abc(def)xyz) 1: xyz - *** Failers -No match +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match @@ -3162,8 +3065,7 @@ No match 0: ab Ab 0: Ab - *** Failers -No match +\= Expect no match aB No match AB @@ -3173,8 +3075,7 @@ No match a bcd e 0: a bcd e 1: a bc - *** Failers -No match +\= Expect no match a b cd e No match abcd e @@ -3186,8 +3087,7 @@ No match a bcde f 0: a bcde f 1: a bcde f - *** Failers -No match +\= Expect no match abcdef No match @@ -3198,8 +3098,7 @@ No match aBc 0: aBc 1: aB - *** Failers -No match +\= Expect no match abC No match aBC @@ -3218,8 +3117,7 @@ No match 0: abc aBc 0: aBc - *** Failers -No match +\= Expect no match ABC No match abC @@ -3232,8 +3130,7 @@ No match 0: aBc aBBc 0: aBBc - *** Failers -No match +\= Expect no match aBC No match aBBC @@ -3244,8 +3141,7 @@ No match 0: abcd abCd 0: abCd - *** Failers -No match +\= Expect no match aBCd No match abcD @@ -3258,8 +3154,7 @@ No match 0: more than MILLION more \n than Million 0: more \x0a than Million - *** Failers -No match +\= Expect no match MORE THAN MILLION No match more \n than \n million @@ -3272,8 +3167,7 @@ No match 0: more than MILLION more \n than Million 0: more \x0a than Million - *** Failers -No match +\= Expect no match MORE THAN MILLION No match more \n than \n million @@ -3286,8 +3180,7 @@ No match 0: aBbc aBBc 0: aBBc - *** Failers -No match +\= Expect no match Abc No match abAb @@ -3300,8 +3193,7 @@ No match 0: abc aBc 0: aBc - *** Failers -No match +\= Expect no match Ab No match abC @@ -3316,8 +3208,7 @@ No match aBxxc 0: xxc 1: xx - *** Failers -No match +\= Expect no match Abxxc No match ABxxc @@ -3331,8 +3222,7 @@ No match 1: a bB 0: bB - *** Failers -No match +\= Expect no match aB No match bA @@ -3346,8 +3236,7 @@ No match 0: b bb 0: bb - *** Failers -No match +\= Expect no match ab No match @@ -3361,8 +3250,7 @@ No match 0: 12 123 0: 12 - *** Failers -No match +\= Expect no match xyz No match @@ -3371,8 +3259,7 @@ No match 0: abc: 12 0: 12 - *** Failers -No match +\= Expect no match 123 No match xyz @@ -3387,8 +3274,7 @@ No match 0: cat focat 0: cat - *** Failers -No match +\= Expect no match foocat No match @@ -3401,8 +3287,7 @@ No match 0: cat focat 0: cat - *** Failers -No match +\= Expect no match foocat No match @@ -3441,8 +3326,7 @@ No match 0: 12aa 1: 1 2: 2 - *** Failers -No match +\= Expect no match 1234 No match @@ -3459,8 +3343,7 @@ No match blaH blaH 0: blaH blaH 1: blaH - *** Failers -No match +\= Expect no match blah BLAH No match Blah blah @@ -3628,8 +3511,7 @@ No match 0: 12-sep-98 12-09-98 0: 12-09-98 - *** Failers -No match +\= Expect no match sep-12-98 No match @@ -3640,8 +3522,7 @@ No match foobarfootling 0: barfoo 1: foo - *** Failers -No match +\= Expect no match foobar No match barfoo @@ -3676,8 +3557,7 @@ No match BBx 0: BBx 1: BB - *** Failers -No match +\= Expect no match abcX No match aBCX @@ -3709,8 +3589,7 @@ No match France 0: F 1: F - *** Failers -No match +\= Expect no match Africa No match @@ -3733,8 +3612,7 @@ No match Zambesi 0: Z 1: Z - *** Failers -No match +\= Expect no match aCD No match XY @@ -3743,8 +3621,7 @@ No match /(?<=foo\n)^bar/m foo\nbar 0: bar - *** Failers -No match +\= Expect no match bar No match baz\nbar @@ -3757,39 +3634,37 @@ No match 0: baz koobarbaz 0: baz - *** Failers -No match +\= Expect no match baz No match foobarbaz No match -/The cases of aaaa and aaaaaa are missed out below because Perl does things/ -/differently. We know that odd, and maybe incorrect, things happen with/ -No match -/recursive references in Perl, as far as 5.11.3 - see some stuff in test #2./ -No match +# The cases of aaaa and aaaaaa are missed out below because Perl does things +# differently. We know that odd, and maybe incorrect, things happen with +# recursive references in Perl, as far as 5.11.3 - see some stuff in test #2. /^(a\1?){4}$/ - a -No match - aa -No match - aaa -No match aaaaa 0: aaaaa 1: a aaaaaaa 0: aaaaaaa 1: a + aaaaaaaaaa + 0: aaaaaaaaaa + 1: aaaa +\= Expect no match + a +No match + aa +No match + aaa +No match aaaaaaaa No match aaaaaaaaa No match - aaaaaaaaaa - 0: aaaaaaaaaa - 1: aaaa aaaaaaaaaaa No match aaaaaaaaaaaa @@ -3800,16 +3675,10 @@ No match No match aaaaaaaaaaaaaaa No match - aaaaaaaaaaaaaaaa + aaaaaaaaaaaaaaaa No match /^(a\1?)(a\1?)(a\2?)(a\3?)$/ - a -No match - aa -No match - aaa -No match aaaa 0: aaaa 1: a @@ -3834,16 +3703,23 @@ No match 2: aa 3: aaa 4: a - aaaaaaaa -No match - aaaaaaaaa -No match aaaaaaaaaa 0: aaaaaaaaaa 1: a 2: aa 3: aaa 4: aaaa +\= Expect no match + a +No match + aa +No match + aaa +No match + aaaaaaaa +No match + aaaaaaaaa +No match aaaaaaaaaaa No match aaaaaaaaaaaa @@ -3857,9 +3733,8 @@ No match aaaaaaaaaaaaaaaa No match -/The following tests are taken from the Perl 5.005 test suite; some of them/ -/are compatible with 5.004, but I'd rather not have to sort them out./ -No match +# The following tests are taken from the Perl 5.005 test suite; some of them +# are compatible with 5.004, but I'd rather not have to sort them out. /abc/ abc @@ -3868,8 +3743,7 @@ No match 0: abc ababc 0: abc - *** Failers -No match +\= Expect no match xbc No match axc @@ -3904,8 +3778,7 @@ No match /ab+bc/ abbc 0: abbc - *** Failers -No match +\= Expect no match abc No match abq @@ -3930,8 +3803,7 @@ No match 0: abbbbc /ab{4,5}bc/ - *** Failers -No match +\= Expect no match abq No match abbbbc @@ -3960,8 +3832,7 @@ No match /^abc$/ abc 0: abc - *** Failers -No match +\= Expect no match abbbbc No match abcc @@ -3976,10 +3847,7 @@ No match /abc$/ aabc 0: abc - *** Failers -No match - aabc - 0: abc +\= Expect no match aabcd No match @@ -4004,8 +3872,7 @@ No match /a[bc]d/ abd 0: abd - *** Failers -No match +\= Expect no match axyzd No match abc @@ -4038,8 +3905,7 @@ No match /a[^bc]d/ aed 0: aed - *** Failers -No match +\= Expect no match abd No match abd @@ -4052,10 +3918,9 @@ No match /a[^]b]c/ adc 0: adc - *** Failers -No match a-c 0: a-c +\= Expect no match a]c No match @@ -4068,8 +3933,7 @@ No match 0: a /\by\b/ - *** Failers -No match +\= Expect no match xy No match yz @@ -4078,8 +3942,7 @@ No match No match /\Ba\B/ - *** Failers - 0: a +\= Expect no match a- No match -a @@ -4106,10 +3969,7 @@ No match /\W/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match a No match @@ -4120,10 +3980,7 @@ No match /a\Sb/ a-b 0: a-b - *** Failers -No match - a-b - 0: a-b +\= Expect no match a b No match @@ -4134,10 +3991,7 @@ No match /\D/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match 1 No match @@ -4148,10 +4002,7 @@ No match /[\W]/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match a No match @@ -4162,10 +4013,7 @@ No match /a[\S]b/ a-b 0: a-b - *** Failers -No match - a-b - 0: a-b +\= Expect no match a b No match @@ -4176,10 +4024,7 @@ No match /[\D]/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match 1 No match @@ -4269,14 +4114,13 @@ No match 0: cde /abc/ - *** Failers -No match +\= Expect no match b No match - /a*/ - + \ + 0: /([abc])*d/ abbbcd @@ -4361,8 +4205,7 @@ No match 0: adcdcde /a[bcd]+dcdcde/ - *** Failers -No match +\= Expect no match abcde No match adcdcde @@ -4400,8 +4243,7 @@ No match reffgz 0: effgz 1: effgz - *** Failers -No match +\= Expect no match effg No match bcdd @@ -4449,8 +4291,7 @@ No match 9: a /multiple words of text/ - *** Failers -No match +\= Expect no match aa No match uh-uh @@ -4500,13 +4341,11 @@ No match /(a)|\1/ a 0: a - 1: a - *** Failers - 0: a 1: a ab 0: a 1: a +\= Expect no match x No match @@ -4543,8 +4382,7 @@ No match 0: ABC ABABC 0: ABC - *** Failers -No match +\= Expect no match aaxabxbaxbbx No match XBC @@ -4577,8 +4415,7 @@ No match 0: ABBC /ab+bc/i - *** Failers -No match +\= Expect no match ABC No match ABQ @@ -4603,8 +4440,7 @@ No match 0: ABBBBC /ab{4,5}?bc/i - *** Failers -No match +\= Expect no match ABQ No match ABBBBC @@ -4633,8 +4469,7 @@ No match /^abc$/i ABC 0: ABC - *** Failers -No match +\= Expect no match ABBBBC No match ABCC @@ -4669,10 +4504,9 @@ No match 0: AXYZC /a.*c/i - *** Failers -No match AABC 0: AABC +\= Expect no match AXYZD No match @@ -4683,8 +4517,7 @@ No match /a[b-d]e/i ACE 0: ACE - *** Failers -No match +\= Expect no match ABC No match ABD @@ -4717,8 +4550,7 @@ No match /a[^-b]c/i ADC 0: ADC - *** Failers -No match +\= Expect no match ABD No match A-C @@ -4740,8 +4572,7 @@ No match 1: /$b/i - *** Failers -No match +\= Expect no match A]C No match B @@ -4833,11 +4664,6 @@ No match CDE 0: CDE -/abc/i - -/a*/i - - /([abc])*d/i ABBBCD 0: ABBBCD @@ -4877,6 +4703,7 @@ No match 0: HIJ /^(ab|cd)e/i +\= Expect no match ABCDE No match @@ -4956,8 +4783,7 @@ No match REFFGZ 0: EFFGZ 1: EFFGZ - *** Failers -No match +\= Expect no match ADCDCDE No match EFFG @@ -5017,8 +4843,7 @@ No match 1: C /multiple words of text/i - *** Failers -No match +\= Expect no match AA No match UH-UH @@ -5176,8 +5001,7 @@ No match aaaaaaaaaa 0: aaaaaaaaaa 1: aaaa - *** Failers -No match +\= Expect no match AB No match aaaaaaaaa @@ -5189,8 +5013,7 @@ No match aaaaaaaaaa 0: aaaaaaaaaa 1: aaaa - *** Failers -No match +\= Expect no match aaaaaaaaa No match aaaaaaaaaaa @@ -5209,8 +5032,7 @@ No match /(?<=a)b/ ab 0: b - *** Failers -No match +\= Expect no match cb No match b @@ -5286,8 +5108,7 @@ No match 1: A /(?:(?i)a)b/ - *** Failers -No match +\= Expect no match cb No match aB @@ -5314,8 +5135,7 @@ No match 1: A /(?i:a)b/ - *** Failers -No match +\= Expect no match aB No match aB @@ -5342,34 +5162,14 @@ No match 1: a /(?:(?-i)a)b/i - *** Failers -No match aB 0: aB - Ab -No match - -/((?-i)a)b/i - -/(?:(?-i)a)b/i - aB - 0: aB - -/((?-i)a)b/i - aB - 0: aB - 1: a - -/(?:(?-i)a)b/i - *** Failers -No match +\= Expect no match Ab No match AB No match -/((?-i)a)b/i - /(?-i:a)b/i ab 0: ab @@ -5389,8 +5189,7 @@ No match 1: a /(?-i:a)b/i - *** Failers -No match +\= Expect no match AB No match Ab @@ -5408,8 +5207,7 @@ No match 1: a /(?-i:a)b/i - *** Failers -No match +\= Expect no match Ab No match AB @@ -5418,8 +5216,7 @@ No match /((?-i:a))b/i /((?-i:a.))b/i - *** Failers -No match +\= Expect no match AB No match a\nB @@ -5464,8 +5261,7 @@ No match 0: aaac /(? - 2: Failers +\= Expect no match abcd: No match abcd: @@ -5748,8 +5534,7 @@ No match 1: x /a\Z/ - *** Failers -No match +\= Expect no match aaab No match a\nb\n @@ -5768,8 +5553,6 @@ No match /b\z/ a\nb 0: b - *** Failers -No match /^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/ a @@ -5799,8 +5582,7 @@ No match 12-ab.1245 0: 12-ab.1245 1: - *** Failers -No match +\= Expect no match \ No match .a @@ -5835,18 +5617,19 @@ No match endingwxyz 0: endingwxyz 1: wxyz - *** Failers -No match +\= Expect no match a rather long string that doesn't end with one of them No match /word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ word cat dog elephant mussel cow horse canary baboon snake shark otherword 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark No match /word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope No match @@ -5855,8 +5638,7 @@ No match 0: foo 123999foo 0: foo - *** Failers -No match +\= Expect no match 123abcfoo No match @@ -5865,8 +5647,7 @@ No match 0: foo 123999foo 0: foo - *** Failers -No match +\= Expect no match 123abcfoo No match @@ -5875,8 +5656,7 @@ No match 0: foo 123456foo 0: foo - *** Failers -No match +\= Expect no match 123999foo No match @@ -5885,8 +5665,7 @@ No match 0: foo 123456foo 0: foo - *** Failers -No match +\= Expect no match 123999foo No match @@ -5992,8 +5771,7 @@ No match 0: - 0digit 0: 0 - *** Failers -No match +\= Expect no match bcdef No match @@ -6018,11 +5796,12 @@ No match 0: ab /(?!\A)x/m - a\nxb\n + a\nxb\n 0: x /(?!^)x/m - a\nxb\n +\= Expect no match + a\nxb\n No match /abc\Qabc\Eabc/ @@ -6036,8 +5815,7 @@ No match / abc\Q abc\Eabc/x abc abcabc 0: abc abcabc - *** Failers -No match +\= Expect no match abcabcabc No match @@ -6077,8 +5855,7 @@ No match /\Gabc/ abc 0: abc - *** Failers -No match +\= Expect no match xyzabc No match @@ -6096,8 +5873,7 @@ No match /a(?x: b c )d/ XabcdY 0: abcd - *** Failers -No match +\= Expect no match Xa b c d Y No match @@ -6112,8 +5888,7 @@ No match /(?i)AB(?-i)C/ XabCY 0: abC - *** Failers -No match +\= Expect no match XabcY No match @@ -6124,8 +5899,7 @@ No match DE 0: DE 1: D - *** Failers -No match +\= Expect no match abcE No match abCe @@ -6192,8 +5966,7 @@ No match a123:ddde:9999:b342::324e:dcba:abcd 0: a123:ddde:9999:b342::324e:dcba:abcd 1: - *** Failers -No match +\= Expect no match 1:2:3:4:5:6:7:8 No match a123:bce:ddde:9999:b342::324e:dcba:abcd @@ -6222,12 +5995,12 @@ No match 0: d ] 0: ] - *** Failers - 0: a +\= Expect no match b No match /(a+)*b/ +\= Expect no match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match @@ -6262,8 +6035,7 @@ No match /ab cd(?x) de fg/ ab cddefg 0: ab cddefg - ** Failers -No match +\= Expect no match abcddefg No match @@ -6271,24 +6043,21 @@ No match foobarX 0: bar 1: bar - ** Failers -No match +\= Expect no match boobarX No match /(?a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -No match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match /(?:a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -No match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match /\Z/g abc\n @@ -6540,18 +6306,21 @@ No match /^(?s)(?>.*)(?a+|ab)+?c/ +\= Expect no match aabc No match /(?>a+|ab)+c/ +\= Expect no match aabc No match @@ -6830,10 +6602,12 @@ No match 2: b /^(?:a|ab)++c/ +\= Expect no match aaaabc No match /^(?>a|ab)++c/ +\= Expect no match aaaabc No match @@ -6845,8 +6619,7 @@ No match abcabcabc 0: abc 0+ abcabc - ** Failers -No match +\= Expect no match xyz No match @@ -6854,8 +6627,7 @@ No match abcabcabc 0: abc 0+ abcabc - ** Failers -No match +\= Expect no match xyz No match @@ -6863,8 +6635,7 @@ No match abcabcabc 0: abc 0+ abcabc - ** Failers -No match +\= Expect no match xyz No match @@ -6873,8 +6644,7 @@ No match 0: xyz /(?=abc){1}xyz/ - ** Failers -No match +\= Expect no match xyz No match @@ -6913,8 +6683,7 @@ No match /^[\g]+/ ggg<<>> 0: ggg<<>> - ** Failers -No match +\= Expect no match \\ga No match @@ -6937,16 +6706,14 @@ No match /(?<=a{2})b/i xaabc 0: b - ** Failers -No match +\= Expect no match xabc No match /(? X NYQZ 0: X NYQZ - ** Failers -No match +\= Expect no match >XYZ No match > X NY Z @@ -7120,12 +6871,11 @@ No match abcdef 0: 0+ def - ** Failers -No match +\= Expect no match defabcxyz No match -/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/ +/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-2}Z/ ababababbbabZXXXX 0: ababababbbabZ 1: ab @@ -7140,6 +6890,7 @@ No match 1: bon /(^(a|b\g{-1}))/ +\= Expect no match bacxxx No match @@ -7150,8 +6901,7 @@ No match xyzxyz 0: xyzxyz 1: xyz - ** Failers -No match +\= Expect no match abcxyz No match xyzabc @@ -7164,8 +6914,7 @@ No match xyzabc 0: xyzabc 1: xyz - ** Failers -No match +\= Expect no match xyzxyz No match @@ -7207,8 +6956,7 @@ No match ab:ababxyz 0: ab:abab 1: ab - ** Failers -No match +\= Expect no match a:axyz No match ab:abxyz @@ -7221,8 +6969,7 @@ No match ab:ababxyz 0: ab:abab 1: ab - ** Failers -No match +\= Expect no match a:axyz No match ab:abxyz @@ -7272,8 +7019,7 @@ No match 0: 10.0.0.0 1: 2: .0 - ** Failers -No match +\= Expect no match 10.6 No match 455.3.4.5 @@ -7289,8 +7035,7 @@ No match 10.0.0.0 0: 10.0.0.0 1: .0 - ** Failers -No match +\= Expect no match 10.6 No match 455.3.4.5 @@ -7300,8 +7045,7 @@ No match now is the time for all good men to come to the aid of the party 0: now is the time for all good men to come to the aid of the party 1: party - *** Failers -No match +\= Expect no match this is not a line with only words and spaces! No match @@ -7310,8 +7054,7 @@ No match 0: 12345a 1: 12345 2: a - *** Failers -No match +\= Expect no match 12345+ No match @@ -7341,8 +7084,7 @@ No match (abc(def)xyz) 0: (abc(def)xyz) 1: xyz - *** Failers -No match +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match @@ -7356,8 +7098,7 @@ No match a(b(c))d 0: a(b(c))d 1: d - *** Failers) -No match +\= Expect no match) a(b(c)d No match @@ -7395,8 +7136,7 @@ No match 2: 3: AblewasIereIsawElba 4: A - *** Failers -No match +\= Expect no match Thequickbrownfox No match @@ -7411,8 +7151,7 @@ No match -12 0: -12 1: -12 - *** Failers -No match +\= Expect no match ((2+2)*-3)-7) No match @@ -7425,8 +7164,7 @@ No match 0: xxyzxyzz 1: xxyzxyzz 2: xyzxyz - *** Failers -No match +\= Expect no match xxyzz No match xxyzxyzxyzz @@ -7457,28 +7195,32 @@ No match 0: <> 1: <> 2: <> - *** Failers -No match +\= Expect no match 3: Able was I ere I saw Elba 4: A - *** Failers -No match +\= Expect no match The quick brown fox No match @@ -7601,6 +7342,7 @@ No match 0: ablewasiereisawelba 1: ablewasiereisawelba 2: a +\= Expect no match rhubarb No match the quick brown fox @@ -7610,8 +7352,7 @@ No match baz 0: a 1: a - ** Failers -No match +\= Expect no match caz No match @@ -7619,8 +7360,7 @@ No match zbaaz 0: a 1: a - ** Failers -No match +\= Expect no match aaa No match @@ -7636,8 +7376,7 @@ No match defdef 0: defdef 1: def - ** Failers -No match +\= Expect no match abcdef No match defabc @@ -7650,8 +7389,7 @@ No match defabc 0: defabc 1: def - ** Failers -No match +\= Expect no match defdef No match abcdef @@ -7671,8 +7409,7 @@ No match 4: " 5: 6: " - ** Failers -No match +\= Expect no match b\"11111 No match @@ -7683,8 +7420,7 @@ No match CCD 0: CC 1: C - ** Failers -No match +\= Expect no match CAD No match @@ -7695,8 +7431,7 @@ No match BCD 0: BC 1: C - ** Failers -No match +\= Expect no match ABCD No match CAD @@ -7720,8 +7455,7 @@ No match BAX 0: BA 1: A - ** Failers -No match +\= Expect no match ACX No match ABC @@ -7742,16 +7476,14 @@ No match 2: ef /^(?=a(*SKIP)b|ac)/ - ** Failers -No match +\= Expect no match ac No match /^(?=a(*PRUNE)b)/ ab 0: - ** Failers -No match +\= Expect no match ac No match @@ -7825,14 +7557,14 @@ No match # I think is a Perl bug. /A(*COMMIT)(B|D)/ +\= Expect no match ACABX No match # Check the use of names for failure /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/mark - ** Failers -No match +\= Expect no match AC No match, mark = A CB @@ -7843,20 +7575,22 @@ No match, mark = B 0: C 1: C MK: A +\= Expect no match D No match, mark = A /^(A(*THEN:A)B|C(*THEN:B)D)/mark - ** Failers -No match +\= Expect no match CB No match, mark = B /^(?:A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match CB No match, mark = B /^(?>A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match CB No match, mark = B @@ -7882,6 +7616,7 @@ No match, mark = B 0: AC /(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match AAAC No match, mark = A @@ -7905,8 +7640,7 @@ MK: B 1: A 2: B 3: C - ** Failers -No match +\= Expect no match DEFGABC No match @@ -7918,6 +7652,7 @@ No match 1: a /(\w+)b(*COMMIT)\w{2}/ +\= Expect no match abbb No match @@ -7930,10 +7665,12 @@ No match # COMMIT should override THEN. /(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?/ +\= Expect no match yes No match /(?>(*COMMIT)(yes|no)(*THEN)(*F))?/ +\= Expect no match yes No match @@ -7944,10 +7681,12 @@ No match 0: bc /(*SKIP)bc/ +\= Expect no match a No match /(*SKIP)b/ +\= Expect no match a No match @@ -7963,9 +7702,7 @@ No match aA 0: aA 1: a - ** Failers - 0: ** - 1: * +\= Expect no match ab No match aB @@ -7978,8 +7715,7 @@ No match /^(?&t)*+(?(DEFINE)(?a))\w$/ aaaaaaX 0: aaaaaaX - ** Failers -No match +\= Expect no match aaaaaa No match @@ -7998,8 +7734,7 @@ No match 0: Y 1: 2: Y - ** Failers -No match +\= Expect no match aaaa No match @@ -8010,8 +7745,7 @@ No match YZ 0: Y 1: Y - ** Failers -No match +\= Expect no match aaaa No match @@ -8020,8 +7754,7 @@ No match 0: aaaaX 1: a 2: X - ** Failers -No match +\= Expect no match aaaa No match YZ @@ -8031,8 +7764,7 @@ No match aaaaX 0: aaaaX 1: X - ** Failers -No match +\= Expect no match aaaa No match YZ @@ -8061,8 +7793,7 @@ No match 0: aaaaX 1: a 2: X - ** Failers -No match +\= Expect no match aaa No match YZ @@ -8072,8 +7803,7 @@ No match aaaaX 0: aaaaX 1: X - ** Failers -No match +\= Expect no match aaa No match YZ @@ -8091,16 +7821,14 @@ No match 1: /(a)++(?1)b/ - ** Failers -No match +\= Expect no match ab No match aab No match /(a)*+(?1)b/ - ** Failers -No match +\= Expect no match ab No match aab @@ -8157,6 +7885,7 @@ No match 1: a /^(a)(?1)++ab/ +\= Expect no match aaaab No match @@ -8229,6 +7958,7 @@ MK: any \x0aname # Capture /^.*? (a(*THEN)b) c/x +\= Expect no match aabc No match @@ -8244,12 +7974,14 @@ No match 2: ab /^.*? ( (a(*THEN)b) ) c/x +\= Expect no match aabc No match # Non-capture /^.*? (?:a(*THEN)b) c/x +\= Expect no match aabc No match @@ -8262,12 +7994,14 @@ No match 0: aabc /^.*? (?: (?:a(*THEN)b) ) c/x +\= Expect no match aabc No match # Atomic /^.*? (?>a(*THEN)b) c/x +\= Expect no match aabc No match @@ -8280,12 +8014,14 @@ No match 0: aabc /^.*? (?> (?>a(*THEN)b) ) c/x +\= Expect no match aabc No match # Possessive capture /^.*? (a(*THEN)b)++ c/x +\= Expect no match aabc No match @@ -8301,12 +8037,14 @@ No match 2: ab /^.*? ( (a(*THEN)b)++ )++ c/x +\= Expect no match aabc No match # Possessive non-capture /^.*? (?:a(*THEN)b)++ c/x +\= Expect no match aabc No match @@ -8319,6 +8057,7 @@ No match 0: aabc /^.*? (?: (?:a(*THEN)b)++ )++ c/x +\= Expect no match aabc No match @@ -8331,6 +8070,7 @@ No match # Condition /^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match ba No match @@ -8339,6 +8079,7 @@ No match 0: ba /^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match ac No match @@ -8374,8 +8115,7 @@ MK: m xabcd 0: c 1: ab - ** Failers -No match +\= Expect no match xacd No match @@ -8413,6 +8153,7 @@ MK: N 0: C 1: C MK: B +\= Expect no match D No match, mark = B @@ -8421,6 +8162,7 @@ No match, mark = B 0: C 1: C MK: B +\= Expect no match D No match, mark = B @@ -8429,6 +8171,7 @@ No match, mark = B 0: C 1: C MK: B +\= Expect no match D No match, mark = B @@ -8437,6 +8180,7 @@ No match, mark = B 0: C 1: C MK: B +\= Expect no match D No match, mark = B @@ -8445,28 +8189,33 @@ No match, mark = B 0: C 1: C MK: B +\= Expect no match D No match, mark = B # This should fail, as the skip causes a bump to offset 3 (the skip). /A(*MARK:A)A+(*SKIP)(B|Z) | AC/x,mark +\= Expect no match AAAC No match, mark = A # Same /A(*MARK:A)A+(*MARK:B)(*SKIP:B)(B|Z) | AC/x,mark +\= Expect no match AAAC No match, mark = B /A(*:A)A+(*SKIP)(B|Z) | AC/x,mark +\= Expect no match AAAC No match, mark = A # This should fail, as a null name is the same as no name. /A(*MARK:A)A+(*SKIP:)(B|Z) | AC/x,mark +\= Expect no match AAAC No match, mark = A @@ -8482,8 +8231,7 @@ MK: A XXYZ 0: XXY MK: B - ** Failers -No match +\= Expect no match XAQQ No match, mark = A XAQQXZZ @@ -8502,8 +8250,7 @@ MK: A 0: CD 1: CD MK: B - ** Failers -No match +\= Expect no match AC No match, mark = A CB @@ -8518,8 +8265,7 @@ MK: A 0: CD 1: CD MK: B - ** Failers -No match +\= Expect no match AC No match, mark = A CB @@ -8558,16 +8304,14 @@ MK: A aw 0: aw MK: n - ** Failers -No match, mark = n +\= Expect no match abc No match, mark = m /b(*:m)f|aw/mark abaw 0: aw - ** Failers -No match +\= Expect no match abc No match, mark = m abax @@ -8578,14 +8322,12 @@ No match, mark = m 0: AAC /a(*PRUNE:X)bc|qq/mark,no_start_optimize - ** Failers -No match, mark = X +\= Expect no match axy No match, mark = X /a(*THEN:X)bc|qq/mark,no_start_optimize - ** Failers -No match, mark = X +\= Expect no match axy No match, mark = X @@ -8593,8 +8335,7 @@ No match, mark = X abxy 0: abx MK: A - ** Failers -No match +\= Expect no match abpq No match @@ -8602,8 +8343,7 @@ No match abxy 0: abx MK: Y - ** Failers -No match +\= Expect no match abpq No match @@ -8611,8 +8351,7 @@ No match abxy 0: abx MK: A - ** Failers -No match +\= Expect no match abpq No match @@ -8620,8 +8359,7 @@ No match abxy 0: abx MK: Y - ** Failers -No match +\= Expect no match abpq No match @@ -8629,8 +8367,7 @@ No match abxy 0: abx MK: A - ** Failers -No match +\= Expect no match abpq No match @@ -8638,8 +8375,7 @@ No match abxy 0: abx MK: Y - ** Failers -No match +\= Expect no match abpq No match @@ -8650,6 +8386,7 @@ No match 2: /(another)?(\1+)test/ +\= Expect no match hello world test No match @@ -8675,22 +8412,6 @@ No match aba 0: aba -/.*?a(*PRUNE)b/ - aab - 0: ab - -/.*?a(*PRUNE)b/s - aab - 0: ab - -/^a(*PRUNE)b/s - aab -No match - -/.*?a(*SKIP)b/ - aab - 0: ab - /(?>.*?a)b/s aab 0: ab @@ -8700,6 +8421,7 @@ No match 0: ab /(?>^a)b/s +\= Expect no match aab No match @@ -8722,6 +8444,7 @@ No match 2: wxyz "(?>.*)foo" +\= Expect no match abcdfooxyz No match @@ -8738,6 +8461,7 @@ No match 0: ac /(?<=(*SKIP)ac)a/ +\= Expect no match aa No match @@ -8827,6 +8551,7 @@ MK: n # This gives "no match", as expected. /aaaaa(*COMMIT)b|a+c/ +\= Expect no match aaaaaac No match @@ -8903,6 +8628,7 @@ No match abc 0: abc 1: ab +\= Expect no match abd No match @@ -8919,10 +8645,11 @@ No match 0: abd /a(?=b(*COMMIT)c)[^d]|abd/ + abc + 0: ab +\= Expect no match abd No match - abc - 0: ab /a(?=bc).|abd/ abd @@ -8931,6 +8658,7 @@ No match 0: ab /a(?>b(*COMMIT)c)d|abd/ +\= Expect no match abceabd No match @@ -8943,6 +8671,7 @@ No match 0: abd /(?>a(*COMMIT)c)d|abd/ +\= Expect no match abd No match @@ -8956,8 +8685,7 @@ No match # made them compatible. /^(a)?(?(1)a|b)+$/ - *** Failers -No match +\= Expect no match a No match @@ -8977,8 +8705,8 @@ No match abcd 0: abcd - /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/x,mark +\= Expect no match AAAC No match, mark = A @@ -8986,68 +8714,82 @@ No match, mark = A abcd 0: abcd 1: abcd - *** Failers -No match +\= Expect no match abcxy No match /^((yes|no)(*THEN)(*F))?/ +\= Expect no match yes No match /(A (.*) C? (*THEN) | A D) (*FAIL)/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) C? (*THEN) | A D) z/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) C? (*THEN) | A D) \s* (*FAIL)/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) C? (*THEN) | A D) \s* z/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) (?:C|) (*THEN) | A D) (*FAIL)/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) (?:C|) (*THEN) | A D) z/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) C{0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) C{0,6} (*THEN) | A D) z/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) (CE){0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match AbcdCEBefgBhiBqz No match /(A (.*) (CE){0,6} (*THEN) | A D) z/x +\= Expect no match AbcdCEBefgBhiBqz No match /(A (.*) (CE*){0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match AbcdCBefgBhiBqz No match /(A (.*) (CE*){0,6} (*THEN) | A D) z/x +\= Expect no match AbcdCBefgBhiBqz No match /(?=a(*COMMIT)b|ac)ac|ac/ +\= Expect no match ac No match /(?=a(*COMMIT)b|(ac)) ac | (a)c/x +\= Expect no match ac No match @@ -9082,6 +8824,7 @@ No match 0: bn /(?=b(*SKIP)a)bn|bnn/ +\= Expect no match bnn No match @@ -9148,30 +8891,35 @@ MK: m(m /^\d*\w{4}/ 1234 0: 1234 +\= Expect no match 123 No match /^[^b]*\w{4}/ aaaa 0: aaaa +\= Expect no match aaa No match /^[^b]*\w{4}/i aaaa 0: aaaa +\= Expect no match aaa No match /^a*\w{4}/ aaaa 0: aaaa +\= Expect no match aaa No match /^a*\w{4}/i aaaa 0: aaaa +\= Expect no match aaa No match @@ -9194,8 +8942,7 @@ No match 1: A 2: 3: bar - ** Failers -No match +\= Expect no match Afoofoo No match Abarbar @@ -9278,6 +9025,7 @@ No match 1: ab /(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc/ +\= Expect no match acb No match @@ -9337,6 +9085,7 @@ No match 2: b /(?:(?P=same)?(?:(?P=same)(?Pa)(?P=same)|(?P=same)?(?Pb)(?P=same)){2}(?P=same)(?Pc)(?P=same)){2}(?Pz)?/g,dupnames +\= Expect no match bbbaaaccccaaabbbcc No match @@ -9463,4 +9212,103 @@ No match 0: 1: +/(?|(aaa)|(b))\g{1}/ + aaaaaa + 0: aaaaaa + 1: aaa + bb + 0: bb + 1: b + +/(?|(aaa)|(b))(?1)/ + aaaaaa + 0: aaaaaa + 1: aaa + baaa + 0: baaa + 1: b +\= Expect no match + bb +No match + +/(?|(aaa)|(b))/ + xaaa + 0: aaa + 1: aaa + xbc + 0: b + 1: b + +/(?|(?'a'aaa)|(?'a'b))\k'a'/ + aaaaaa + 0: aaaaaa + 1: aaa + bb + 0: bb + 1: b + +/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/dupnames + aaaccccaaa + 0: aaaccccaaa + 1: aaa + 2: cccc + bccccb + 0: bccccb + 1: b + 2: cccc + +# /x does not apply to MARK labels + +/x (*MARK:ab cd # comment +ef) x/x,mark + axxz + 0: xx +MK: ab cd # comment\x0aef + +/(?<=a(B){0}c)X/ + acX + 0: X + +/(?b)(?(DEFINE)(a+))(?&DEFINE)/ + bbbb + 0: bb + 1: b +\= Expect no match + baaab +No match + +/(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])/ + \ Fred:099 + 0: + +/(?=.*X)X$/ + \ X + 0: X + +/(?s)(?=.*?)b/ + aabc + 0: b + +/(Z)(a)\2{1,2}?(?-i)\1X/i + ZaAAZX + 0: ZaAAZX + 1: Z + 2: a + +/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ + +/[s[:digit:]\E-H]+/ + s09-H + 0: s09-H + +/[s[:digit:]\Q\E-H]+/ + s09-H + 0: s09-H + +/a+(?:|b)a/ + aaaa + 0: aaaa + +/X?(R||){3335}/ + # End of testinput1 diff --git a/pcre2/testdata/testoutput10 b/pcre2/testdata/testoutput10 index ef248db8f..31b7d00ed 100644 --- a/pcre2/testdata/testoutput10 +++ b/pcre2/testdata/testoutput10 @@ -1,70 +1,10 @@ # This set of tests is for UTF-8 support and Unicode property support, with # relevance only for the 8-bit library. -/X(\C{3})/utf - X\x{1234} - 0: X\x{1234} - 1: \x{1234} - -/X(\C{4})/utf - X\x{1234}YZ - 0: X\x{1234}Y - 1: \x{1234}Y - -/X\C*/utf - XYZabcdce - 0: XYZabcdce - -/X\C*?/utf - XYZabcde - 0: X - -/X\C{3,5}/utf - Xabcdefg - 0: Xabcde - X\x{1234} - 0: X\x{1234} - X\x{1234}YZ - 0: X\x{1234}YZ - X\x{1234}\x{512} - 0: X\x{1234}\x{512} - X\x{1234}\x{512}YZ - 0: X\x{1234}\x{512} - -/X\C{3,5}?/utf - Xabcdefg - 0: Xabc - X\x{1234} - 0: X\x{1234} - X\x{1234}YZ - 0: X\x{1234} - X\x{1234}\x{512} - 0: X\x{1234} - -/a\Cb/utf - aXb - 0: aXb - a\nb - 0: a\x{0a}b - -/a\C\Cb/utf - a\x{100}b - 0: a\x{100}b - -/ab\Cde/utf - abXde - 0: abXde - -/a\C\Cb/utf - a\x{100}b - 0: a\x{100}b - ** Failers -No match - a\x{12257}b -No match +# The next 4 patterns have UTF-8 errors /[Ã]/utf -Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80 +Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80 /Ã/utf Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end @@ -72,7 +12,13 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end /ÃÃÃxxx/utf Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80 +/‚‚‚‚‚‚‚Ã/utf +Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set + +# Now test subjects + /badutf/utf +\= Expect UTF-8 errors X\xdf Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1 XX\xef @@ -146,13 +92,14 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0 \xfc\x80\x80\x80\x80\x8f Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 \x80 -Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0 +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 \xfe Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 \xff Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 /badutf/utf +\= Expect UTF-8 errors XX\xfb\x80\x80\x80\x80 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2 XX\xfd\x80\x80\x80\x80\x80 @@ -161,6 +108,7 @@ Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at of Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2 /shortutf/utf +\= Expect UTF-8 errors XX\xdf\=ph Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 XX\xef\=ph @@ -193,6 +141,7 @@ Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 /anything/utf +\= Expect UTF-8 errors X\xc0\x80 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1 XX\xc1\x8f @@ -209,6 +158,15 @@ Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 \xff\x80\x80\x80\x80\x80 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 + \xf8\x88\x80\x80\x80 +Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 + \xf9\x87\x80\x80\x80 +Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 + \xfc\x84\x80\x80\x80\x80 +Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 + \xfd\x83\x80\x80\x80\x80 +Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 +\= Expect no match \xc3\x8f No match \xe0\xaf\x80 @@ -219,14 +177,6 @@ No match No match \xf1\x8f\x80\x80 No match - \xf8\x88\x80\x80\x80 -Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 - \xf9\x87\x80\x80\x80 -Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 - \xfc\x84\x80\x80\x80\x80 -Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 - \xfd\x83\x80\x80\x80\x80 -Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 \xf8\x88\x80\x80\x80\=no_utf_check No match \xf9\x87\x80\x80\x80\=no_utf_check @@ -235,7 +185,62 @@ No match No match \xfd\x83\x80\x80\x80\x80\=no_utf_check No match + +# Similar tests with offsets +/badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 +\= Expect no match + X\xdfabcd\=offset=2 +No match + +/(?<=x)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=2 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\xdf\=offset=3 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6 +\= Expect no match + X\xdfabcd\=offset=3 +No match + +/(?<=xx)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=2 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=3 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + +/(?<=xxxx)badutf/utf +\= Expect UTF-8 errors + X\xdfabcd +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=1 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=2 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabcd\=offset=3 +Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 + X\xdfabc\xdf\=offset=6 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5 + X\xdfabc\xdf\=offset=7 +Failed: error -33: bad offset value +\= Expect no match + X\xdfabcd\=offset=6 +No match + /\x{100}/IB,utf ------------------------------------------------------------------ Bra @@ -448,29 +453,6 @@ First code unit = \xf0 Last code unit = \xab Subject length lower bound = 1 -# This one is here not because it's different to Perl, but because the way -# the captured single-byte is displayed. (In Perl it becomes a character, and you -# can't tell the difference.) - -/X(\C)(.*)/utf - X\x{1234} - 0: X\x{1234} - 1: \x{e1} - 2: \x{88}\x{b4} - X\nabc - 0: X\x{0a}abc - 1: \x{0a} - 2: abc - -# This one is here because Perl gives out a grumbly error message (quite -# correctly, but that messes up comparisons). - -/a\Cb/utf - *** Failers -No match - a\x{100}b -No match - /[^ab\xC0-\xF0]/IB,utf ------------------------------------------------------------------ Bra @@ -499,8 +481,7 @@ Subject length lower bound = 1 0: \x{100} \x{1000} 0: \x{1000} - *** Failers - 0: * +\= Expect no match \x{c0} No match \x{f0} @@ -659,8 +640,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[\xff]/IB,utf ------------------------------------------------------------------ @@ -750,33 +729,35 @@ Failed: error 106 at offset 15: missing terminating ] for character class # This tests the stricter UTF-8 check according to RFC 3629. /X/utf +\= Expect UTF-8 errors \x{d800} Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 - \x{d800}\=no_utf_check -No match \x{da00} Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 - \x{da00}\=no_utf_check -No match \x{dfff} Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 - \x{dfff}\=no_utf_check -No match \x{110000} Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0 - \x{110000}\=no_utf_check -No match \x{2000000} Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 - \x{2000000}\=no_utf_check -No match \x{7fffffff} Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 +\= Expect no match + \x{d800}\=no_utf_check +No match + \x{da00}\=no_utf_check +No match + \x{dfff}\=no_utf_check +No match + \x{110000}\=no_utf_check +No match + \x{2000000}\=no_utf_check +No match \x{7fffffff}\=no_utf_check No match /(*UTF8)\x{1234}/ - abcd\x{1234}pqr + abcd\x{1234}pqr 0: \x{1234} /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I @@ -887,16 +868,19 @@ Subject length lower bound = 3 /a+/utf a\x{123}aa\=offset=1 0: aa - a\x{123}aa\=offset=2 -Error -36 (bad UTF-8 offset) a\x{123}aa\=offset=3 0: aa a\x{123}aa\=offset=4 0: a - a\x{123}aa\=offset=5 -No match +\= Expect bad offset value a\x{123}aa\=offset=6 Failed: error -33: bad offset value +\= Expect bad UTF-8 offset + a\x{123}aa\=offset=2 +Error -36 (bad UTF-8 offset) +\= Expect no match + a\x{123}aa\=offset=5 +No match /\x{1234}+/Ii,utf Capturing subpattern count = 0 @@ -1281,8 +1265,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[z-\x{100}]/IB,utf ------------------------------------------------------------------ @@ -1467,8 +1449,7 @@ Subject length lower bound = 1 0: \x{105} \x{109} 0: \x{109} - ** Failers -No match +\= Expect no match \x{100} No match \x{10a} @@ -1507,8 +1488,7 @@ Subject length lower bound = 1 0: \x{100} \x{101} 0: \x{101} - ** Failers -No match +\= Expect no match \x{102} No match Y @@ -1547,7 +1527,52 @@ Last code unit = 'B' (caseless) Subject length lower bound = 2 /abc/utf,replace=à - abc + abc Failed: error -3: UTF-8 error: 1 byte missing at end +/(?<=(a)(?-1))x/I,utf +Capturing subpattern count = 1 +Max lookbehind = 2 +Options: utf +First code unit = 'x' +Subject length lower bound = 1 + a\x80zx\=offset=3 +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + abc + 0: a +\= Expect no match + 123 +No match + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}] + Ket + End +------------------------------------------------------------------ + # End of testinput10 diff --git a/pcre2/testdata/testoutput11-16 b/pcre2/testdata/testoutput11-16 index eda8938dc..e22581d69 100644 --- a/pcre2/testdata/testoutput11-16 +++ b/pcre2/testdata/testoutput11-16 @@ -4,13 +4,8 @@ # different, so they have separate output files. #forbid_utf +#newline_default LF ANY ANYCRLF -/a\Cb/ - aXb - 0: aXb - a\nb - 0: a\x0ab - /[^\x{c4}]/IB ------------------------------------------------------------------ Bra @@ -581,7 +576,7 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to # Non-UTF characters -/\C{2,3}/ +/.{2,3}/ \x{400000}\x{400001}\x{400002}\x{400003} ** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled. ** Truncation will probably give the wrong result. @@ -646,4 +641,24 @@ Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e \xfc \xfd \xfe \xff Subject length lower bound = 1 +/(*THEN:\[A]{65501})/expand + +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abý¿¿¿¿¿z/utf8_input +** Failed: character value greater than 0xffff cannot be converted to 16-bit in non-UTF mode + abý¿¿¿¿¿z + ab\x{7fffffff}z + +/abÿý¿¿¿¿¿z/utf8_input +** Failed: invalid UTF-8 string cannot be converted to 16-bit string + abÿý¿¿¿¿¿z + ab\x{ffffffff}z + +/abÿAz/utf8_input +** Failed: invalid UTF-8 string cannot be converted to 16-bit string + abÿAz + ab\x{80000041}z + # End of testinput11 diff --git a/pcre2/testdata/testoutput11-32 b/pcre2/testdata/testoutput11-32 index f04878528..d8a909fc4 100644 --- a/pcre2/testdata/testoutput11-32 +++ b/pcre2/testdata/testoutput11-32 @@ -4,13 +4,8 @@ # different, so they have separate output files. #forbid_utf +#newline_default LF ANY ANYCRLF -/a\Cb/ - aXb - 0: aXb - a\nb - 0: a\x0ab - /[^\x{c4}]/IB ------------------------------------------------------------------ Bra @@ -582,7 +577,7 @@ Subject length lower bound = 2 # Non-UTF characters -/\C{2,3}/ +/.{2,3}/ \x{400000}\x{400001}\x{400002}\x{400003} 0: \x{400000}\x{400001}\x{400002} @@ -649,4 +644,27 @@ Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e \xfc \xfd \xfe \xff Subject length lower bound = 1 +/(*THEN:\[A]{65501})/expand + +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abý¿¿¿¿¿z/utf8_input + abý¿¿¿¿¿z + 0: ab\x{7fffffff}z + ab\x{7fffffff}z + 0: ab\x{7fffffff}z + +/abÿý¿¿¿¿¿z/utf8_input + abÿý¿¿¿¿¿z + 0: ab\x{ffffffff}z + ab\x{ffffffff}z + 0: ab\x{ffffffff}z + +/abÿAz/utf8_input + abÿAz + 0: ab\x{80000041}z + ab\x{80000041}z + 0: ab\x{80000041}z + # End of testinput11 diff --git a/pcre2/testdata/testoutput12-16 b/pcre2/testdata/testoutput12-16 index 81584dd89..3b5a0cd26 100644 --- a/pcre2/testdata/testoutput12-16 +++ b/pcre2/testdata/testoutput12-16 @@ -9,78 +9,6 @@ Ã] ** Failed: invalid UTF-8 string cannot be used as input in UTF mode -/X(\C{3})/utf - X\x{11234}Y - 0: X\x{11234}Y - 1: \x{11234}Y - X\x{11234}YZ - 0: X\x{11234}Y - 1: \x{11234}Y - -/X(\C{4})/utf - X\x{11234}YZ - 0: X\x{11234}YZ - 1: \x{11234}YZ - X\x{11234}YZW - 0: X\x{11234}YZ - 1: \x{11234}YZ - -/X\C*/utf - XYZabcdce - 0: XYZabcdce - -/X\C*?/utf - XYZabcde - 0: X - -/X\C{3,5}/utf - Xabcdefg - 0: Xabcde - X\x{11234}Y - 0: X\x{11234}Y - X\x{11234}YZ - 0: X\x{11234}YZ - X\x{11234}\x{512} - 0: X\x{11234}\x{512} - X\x{11234}\x{512}YZ - 0: X\x{11234}\x{512}YZ - X\x{11234}\x{512}\x{11234}Z - 0: X\x{11234}\x{512}\x{11234} - -/X\C{3,5}?/utf - Xabcdefg - 0: Xabc - X\x{11234}Y - 0: X\x{11234}Y - X\x{11234}YZ - 0: X\x{11234}Y - X\x{11234}\x{512}YZ - 0: X\x{11234}\x{512} - *** Failers -No match - X\x{11234} -No match - -/a\Cb/utf - aXb - 0: aXb - a\nb - 0: a\x{0a}b - -/a\C\Cb/utf - a\x{12257}b - 0: a\x{12257}b - a\x{12257}\x{11234}b -No match - ** Failers -No match - a\x{100}b -No match - -/ab\Cde/utf - abXde - 0: abXde - # Check maximum character size /\x{ffff}/IB,utf @@ -310,29 +238,6 @@ First code unit = \x{d844} Last code unit = \x{deab} Subject length lower bound = 1 -# This one is here not because it's different to Perl, but because the way -# the captured single-byte is displayed. (In Perl it becomes a character, and you -# can't tell the difference.) - -/X(\C)(.*)/utf - X\x{1234} - 0: X\x{1234} - 1: \x{1234} - 2: - X\nabc - 0: X\x{0a}abc - 1: \x{0a} - 2: abc - -# This one is here because Perl gives out a grumbly error message (quite -# correctly, but that messes up comparisons). - -/a\Cb/utf - *** Failers -No match - a\x{100}b - 0: a\x{100}b - /[^ab\xC0-\xF0]/IB,utf ------------------------------------------------------------------ Bra @@ -362,8 +267,7 @@ Subject length lower bound = 1 0: \x{100} \x{1000} 0: \x{1000} - *** Failers - 0: * +\= Expect no match \x{c0} No match \x{f0} @@ -520,8 +424,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[\xff]/IB,utf ------------------------------------------------------------------ @@ -607,30 +509,38 @@ Subject length lower bound = 2 Failed: error 106 at offset 13: missing terminating ] for character class /X/utf - XX\x{d800} -Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 XX\x{d800}\=no_utf_check 0: X - XX\x{da00} -Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 XX\x{da00}\=no_utf_check 0: X - XX\x{dc00} -Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 XX\x{dc00}\=no_utf_check 0: X - XX\x{de00} -Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 XX\x{de00}\=no_utf_check 0: X - XX\x{dfff} -Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 XX\x{dfff}\=no_utf_check 0: X +\= Expect UTF error + XX\x{d800} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{da00} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{dc00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{de00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{dfff} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 XX\x{110000} ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 XX\x{d800}\x{1234} Failed: error -25: UTF-16 error: invalid low surrogate at offset 3 +\= Expect no match + XX\x{d800}\=offset=3 +No match + +/(?<=.)X/utf + XX\x{d800}\=offset=3 +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 /(*UTF16)\x{11234}/ abcd\x{11234}pqr @@ -647,7 +557,7 @@ Subject length lower bound = 1 0: \x{11234} /(*UTF-32)\x{11234}/ -Failed: error 134 at offset 17: character code point value in \x{} or \o{} is too large +Failed: error 160 at offset 5: (*VERB) not recognized or malformed abcd\x{11234}pqr /(*UTF-32)\x{112}/ @@ -788,8 +698,10 @@ Subject length lower bound = 3 0: aa a\x{123}aa\=offset=3 0: a +\= Expect no match a\x{123}aa\=offset=4 No match +\= Expect bad offset error a\x{123}aa\=offset=5 Failed: error -33: bad offset value a\x{123}aa\=offset=6 @@ -854,16 +766,21 @@ Subject length lower bound = 1 # Check bad offset /a/utf +\= Expect bad UTF-16 offset, or no match in 32-bit \x{10000}\=offset=1 Error -36 (bad UTF-16 offset) \x{10000}ab\=offset=1 Error -36 (bad UTF-16 offset) +\= Expect 16-bit match, 32-bit no match \x{10000}ab\=offset=2 0: a +\= Expect no match \x{10000}ab\=offset=3 No match +\= Expect no match in 16-bit, bad offset in 32-bit \x{10000}ab\=offset=4 No match +\= Expect bad offset \x{10000}ab\=offset=5 Failed: error -33: bad offset value @@ -1123,10 +1040,6 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too /\o{4200000}/utf Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large -/\C/utf - \x{110000} -** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 - /\x{100}*A/IB,utf ------------------------------------------------------------------ Bra @@ -1174,8 +1087,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[z-\x{100}]/IB,utf ------------------------------------------------------------------ @@ -1365,8 +1276,7 @@ Subject length lower bound = 1 0: \x{105} \x{109} 0: \x{109} - ** Failers -No match +\= Expect no match \x{100} No match \x{10a} @@ -1410,8 +1320,7 @@ Subject length lower bound = 1 0: \x{100} \x{101} 0: \x{101} - ** Failers -No match +\= Expect no match \x{102} No match Y @@ -1454,4 +1363,56 @@ Starting code units: \xff Last code unit = 'B' (caseless) Subject length lower bound = 2 +/./utf + \x{110000} +** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + +/(*UTF)abý¿¿¿¿¿z/B +------------------------------------------------------------------ + Bra + ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z + Ket + End +------------------------------------------------------------------ + +/abý¿¿¿¿¿z/utf +** Failed: character value greater than 0x10ffff cannot be converted to UTF + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + \x{100} + 0: \x{100} + \x{308} + 0: \x{308} +\= Expect no match + 123 +No match + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + # End of testinput12 diff --git a/pcre2/testdata/testoutput12-32 b/pcre2/testdata/testoutput12-32 index 89ac70a6f..149615949 100644 --- a/pcre2/testdata/testoutput12-32 +++ b/pcre2/testdata/testoutput12-32 @@ -9,76 +9,6 @@ Ã] ** Failed: invalid UTF-8 string cannot be used as input in UTF mode -/X(\C{3})/utf - X\x{11234}Y -No match - X\x{11234}YZ - 0: X\x{11234}YZ - 1: \x{11234}YZ - -/X(\C{4})/utf - X\x{11234}YZ -No match - X\x{11234}YZW - 0: X\x{11234}YZW - 1: \x{11234}YZW - -/X\C*/utf - XYZabcdce - 0: XYZabcdce - -/X\C*?/utf - XYZabcde - 0: X - -/X\C{3,5}/utf - Xabcdefg - 0: Xabcde - X\x{11234}Y -No match - X\x{11234}YZ - 0: X\x{11234}YZ - X\x{11234}\x{512} -No match - X\x{11234}\x{512}YZ - 0: X\x{11234}\x{512}YZ - X\x{11234}\x{512}\x{11234}Z - 0: X\x{11234}\x{512}\x{11234}Z - -/X\C{3,5}?/utf - Xabcdefg - 0: Xabc - X\x{11234}Y -No match - X\x{11234}YZ - 0: X\x{11234}YZ - X\x{11234}\x{512}YZ - 0: X\x{11234}\x{512}Y - *** Failers -No match - X\x{11234} -No match - -/a\Cb/utf - aXb - 0: aXb - a\nb - 0: a\x{0a}b - -/a\C\Cb/utf - a\x{12257}b -No match - a\x{12257}\x{11234}b - 0: a\x{12257}\x{11234}b - ** Failers -No match - a\x{100}b -No match - -/ab\Cde/utf - abXde - 0: abXde - # Check maximum character size /\x{ffff}/IB,utf @@ -303,29 +233,6 @@ Options: utf First code unit = \x{212ab} Subject length lower bound = 1 -# This one is here not because it's different to Perl, but because the way -# the captured single-byte is displayed. (In Perl it becomes a character, and you -# can't tell the difference.) - -/X(\C)(.*)/utf - X\x{1234} - 0: X\x{1234} - 1: \x{1234} - 2: - X\nabc - 0: X\x{0a}abc - 1: \x{0a} - 2: abc - -# This one is here because Perl gives out a grumbly error message (quite -# correctly, but that messes up comparisons). - -/a\Cb/utf - *** Failers -No match - a\x{100}b - 0: a\x{100}b - /[^ab\xC0-\xF0]/IB,utf ------------------------------------------------------------------ Bra @@ -355,8 +262,7 @@ Subject length lower bound = 1 0: \x{100} \x{1000} 0: \x{1000} - *** Failers - 0: * +\= Expect no match \x{c0} No match \x{f0} @@ -513,8 +419,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[\xff]/IB,utf ------------------------------------------------------------------ @@ -600,30 +504,38 @@ Subject length lower bound = 2 Failed: error 106 at offset 13: missing terminating ] for character class /X/utf - XX\x{d800} -Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 XX\x{d800}\=no_utf_check 0: X - XX\x{da00} -Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 XX\x{da00}\=no_utf_check 0: X - XX\x{dc00} -Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 XX\x{dc00}\=no_utf_check 0: X - XX\x{de00} -Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 XX\x{de00}\=no_utf_check 0: X - XX\x{dfff} -Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 XX\x{dfff}\=no_utf_check 0: X +\= Expect UTF error + XX\x{d800} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{da00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dc00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{de00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dfff} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 XX\x{110000} Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2 XX\x{d800}\x{1234} Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 +\= Expect no match + XX\x{d800}\=offset=3 +No match + +/(?<=.)X/utf + XX\x{d800}\=offset=3 +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 /(*UTF16)\x{11234}/ Failed: error 160 at offset 5: (*VERB) not recognized or malformed @@ -780,8 +692,10 @@ Subject length lower bound = 3 0: aa a\x{123}aa\=offset=3 0: a +\= Expect no match a\x{123}aa\=offset=4 No match +\= Expect bad offset error a\x{123}aa\=offset=5 Failed: error -33: bad offset value a\x{123}aa\=offset=6 @@ -846,16 +760,21 @@ Subject length lower bound = 1 # Check bad offset /a/utf +\= Expect bad UTF-16 offset, or no match in 32-bit \x{10000}\=offset=1 No match \x{10000}ab\=offset=1 0: a +\= Expect 16-bit match, 32-bit no match \x{10000}ab\=offset=2 No match +\= Expect no match \x{10000}ab\=offset=3 No match +\= Expect no match in 16-bit, bad offset in 32-bit \x{10000}ab\=offset=4 Failed: error -33: bad offset value +\= Expect bad offset \x{10000}ab\=offset=5 Failed: error -33: bad offset value @@ -1115,10 +1034,6 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too /\o{4200000}/utf Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large -/\C/utf - \x{110000} -Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0 - /\x{100}*A/IB,utf ------------------------------------------------------------------ Bra @@ -1166,8 +1081,6 @@ Subject length lower bound = 1 0: \x{100} \x{100}Z 0: \x{100} - *** Failers -No match /[z-\x{100}]/IB,utf ------------------------------------------------------------------ @@ -1357,8 +1270,7 @@ Subject length lower bound = 1 0: \x{105} \x{109} 0: \x{109} - ** Failers -No match +\= Expect no match \x{100} No match \x{10a} @@ -1402,8 +1314,7 @@ Subject length lower bound = 1 0: \x{100} \x{101} 0: \x{101} - ** Failers -No match +\= Expect no match \x{102} No match Y @@ -1446,4 +1357,56 @@ Starting code units: \xff Last code unit = 'B' (caseless) Subject length lower bound = 2 +/./utf + \x{110000} +Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0 + +/(*UTF)abý¿¿¿¿¿z/B +------------------------------------------------------------------ + Bra + ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z + Ket + End +------------------------------------------------------------------ + +/abý¿¿¿¿¿z/utf +** Failed: character value greater than 0x10ffff cannot be converted to UTF + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + \x{100} + 0: \x{100} + \x{308} + 0: \x{308} +\= Expect no match + 123 +No match + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + # End of testinput12 diff --git a/pcre2/testdata/testoutput14 b/pcre2/testdata/testoutput14 deleted file mode 100644 index cdfd6f76c..000000000 --- a/pcre2/testdata/testoutput14 +++ /dev/null @@ -1,242 +0,0 @@ -# These are: -# -# (1) Tests of the match-limiting features. The results are different for -# interpretive or JIT matching, so this test should not be run with JIT. The -# same tests are run using JIT in test 16. - -# (2) Other tests that must not be run with JIT. - -/(a+)*zz/I -Capturing subpattern count = 1 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits -Minimum match limit = 8 -Minimum recursion limit = 6 - 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz - 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa - aaaaaaaaaaaaaz\=find_limits -Minimum match limit = 32768 -Minimum recursion limit = 29 -No match - -!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I -Capturing subpattern count = 1 -May match empty string -Subject length lower bound = 0 - /* this is a C style comment */\=find_limits -Minimum match limit = 120 -Minimum recursion limit = 6 - 0: /* this is a C style comment */ - 1: /* this is a C style comment */ - -/^(?>a)++/ - aa\=find_limits -Minimum match limit = 5 -Minimum recursion limit = 2 - 0: aa - aaaaaaaaa\=find_limits -Minimum match limit = 12 -Minimum recursion limit = 2 - 0: aaaaaaaaa - -/(a)(?1)++/ - aa\=find_limits -Minimum match limit = 7 -Minimum recursion limit = 4 - 0: aa - 1: a - aaaaaaaaa\=find_limits -Minimum match limit = 21 -Minimum recursion limit = 4 - 0: aaaaaaaaa - 1: a - -/a(?:.)*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 65 -Minimum recursion limit = 2 - 0: abbbbbbbbbbbbbbbbbbbbba - -/a(?:.(*THEN))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 86 -Minimum recursion limit = 45 - 0: abbbbbbbbbbbbbbbbbbbbba - -/a(?:.(*THEN:ABC))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 86 -Minimum recursion limit = 45 - 0: abbbbbbbbbbbbbbbbbbbbba - -/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ - aabbccddee\=find_limits -Minimum match limit = 7 -Minimum recursion limit = 2 - 0: aabbccddee - -/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ - aabbccddee\=find_limits -Minimum match limit = 17 -Minimum recursion limit = 16 - 0: aabbccddee - 1: aa - 2: bb - 3: cc - 4: dd - 5: ee - -/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ - aabbccddee\=find_limits -Minimum match limit = 13 -Minimum recursion limit = 10 - 0: aabbccddee - 1: aa - 2: cc - 3: ee - -/(*LIMIT_MATCH=12bc)abc/ -Failed: error 160 at offset 0: (*VERB) not recognized or malformed - -/(*LIMIT_MATCH=4294967290)abc/ -Failed: error 160 at offset 0: (*VERB) not recognized or malformed - -/(*LIMIT_RECURSION=4294967280)abc/I -Capturing subpattern count = 0 -Recursion limit = 4294967280 -First code unit = 'a' -Last code unit = 'c' -Subject length lower bound = 3 - -/(a+)*zz/ - aaaaaaaaaaaaaz -No match - aaaaaaaaaaaaaz\=match_limit=3000 -Failed: error -47: match limit exceeded - -/(a+)*zz/ - aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -53: recursion limit exceeded - -/(*LIMIT_MATCH=3000)(a+)*zz/I -Capturing subpattern count = 1 -Match limit = 3000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 - aaaaaaaaaaaaaz -Failed: error -47: match limit exceeded - aaaaaaaaaaaaaz\=match_limit=60000 -Failed: error -47: match limit exceeded - -/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I -Capturing subpattern count = 1 -Match limit = 3000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 - aaaaaaaaaaaaaz -Failed: error -47: match limit exceeded - -/(*LIMIT_MATCH=60000)(a+)*zz/I -Capturing subpattern count = 1 -Match limit = 60000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 - aaaaaaaaaaaaaz -No match - aaaaaaaaaaaaaz\=match_limit=3000 -Failed: error -47: match limit exceeded - -/(*LIMIT_RECURSION=10)(a+)*zz/I -Capturing subpattern count = 1 -Recursion limit = 10 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 - aaaaaaaaaaaaaz -Failed: error -53: recursion limit exceeded - aaaaaaaaaaaaaz\=recursion_limit=1000 -Failed: error -53: recursion limit exceeded - -/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I -Capturing subpattern count = 1 -Recursion limit = 1000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 - aaaaaaaaaaaaaz -No match - -/(*LIMIT_RECURSION=1000)(a+)*zz/I -Capturing subpattern count = 1 -Recursion limit = 1000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 - aaaaaaaaaaaaaz -No match - aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -53: recursion limit exceeded - -# These three have infinitely nested recursions. - -/((?2))((?1))/ - abc -Failed: error -52: nested recursion at the same subject position - -/((?(R2)a+|(?1)b))/ - aaaabcde -Failed: error -52: nested recursion at the same subject position - -/(?(R)a*(?1)|((?R))b)/ - aaaabcde -Failed: error -52: nested recursion at the same subject position - -# The allusedtext modifier does not work with JIT, which does not maintain -# the leftchar/rightchar data. - -/abc(?=xyz)/allusedtext - abcxyzpqr - 0: abcxyz - >>> - abcxyzpqr\=aftertext - 0: abcxyz - >>> - 0+ xyzpqr - -/(?<=pqr)abc(?=xyz)/allusedtext - xyzpqrabcxyzpqr - 0: pqrabcxyz - <<< >>> - xyzpqrabcxyzpqr\=aftertext - 0: pqrabcxyz - <<< >>> - 0+ xyzpqr - -/a\b/ - a.\=allusedtext - 0: a. - > - a\=allusedtext - 0: a - -/abc\Kxyz/ - abcxyz\=allusedtext - 0: abcxyz - <<< - -/abc(?=xyz(*ACCEPT))/ - abcxyz\=allusedtext - 0: abcxyz - >>> - -/abc(?=abcde)(?=ab)/allusedtext - abcabcdefg - 0: abcabcde - >>>>> - -# End of testinput14 diff --git a/pcre2/testdata/testoutput14-16 b/pcre2/testdata/testoutput14-16 new file mode 100644 index 000000000..05b7d48a8 --- /dev/null +++ b/pcre2/testdata/testoutput14-16 @@ -0,0 +1,61 @@ +# These test special (mostly error) UTF features of DFA matching. They are a +# selection of the more comprehensive tests that are run for non-DFA matching. +# The output is different for the different widths. + +#subject dfa + +/X/utf + XX\x{d800} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{d800}\=offset=3 +No match + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{dfff}\=no_utf_check + 0: X + XX\x{110000} +** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + XX\x{d800}\x{1234} +Failed: error -25: UTF-16 error: invalid low surrogate at offset 3 + +/badutf/utf + X\xdf +No match + XX\xef +No match + XXX\xef\x80 +No match + X\xf7 +No match + XX\xf7\x80 +No match + XXX\xf7\x80\x80 +No match + +/shortutf/utf + XX\xdf\=ph +No match + XX\xef\=ph +No match + XX\xef\x80\=ph +No match + \xf7\=ph +No match + \xf7\x80\=ph +No match + +# End of testinput14 diff --git a/pcre2/testdata/testoutput14-32 b/pcre2/testdata/testoutput14-32 new file mode 100644 index 000000000..30d7fa642 --- /dev/null +++ b/pcre2/testdata/testoutput14-32 @@ -0,0 +1,61 @@ +# These test special (mostly error) UTF features of DFA matching. They are a +# selection of the more comprehensive tests that are run for non-DFA matching. +# The output is different for the different widths. + +#subject dfa + +/X/utf + XX\x{d800} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{d800}\=offset=3 +No match + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dfff}\=no_utf_check + 0: X + XX\x{110000} +Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2 + XX\x{d800}\x{1234} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + +/badutf/utf + X\xdf +No match + XX\xef +No match + XXX\xef\x80 +No match + X\xf7 +No match + XX\xf7\x80 +No match + XXX\xf7\x80\x80 +No match + +/shortutf/utf + XX\xdf\=ph +No match + XX\xef\=ph +No match + XX\xef\x80\=ph +No match + \xf7\=ph +No match + \xf7\x80\=ph +No match + +# End of testinput14 diff --git a/pcre2/testdata/testoutput14-8 b/pcre2/testdata/testoutput14-8 new file mode 100644 index 000000000..1fb0dc148 --- /dev/null +++ b/pcre2/testdata/testoutput14-8 @@ -0,0 +1,61 @@ +# These test special (mostly error) UTF features of DFA matching. They are a +# selection of the more comprehensive tests that are run for non-DFA matching. +# The output is different for the different widths. + +#subject dfa + +/X/utf + XX\x{d800} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{d800}\=offset=3 +Error -36 (bad UTF-8 offset) + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dfff}\=no_utf_check + 0: X + XX\x{110000} +Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2 + XX\x{d800}\x{1234} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + +/badutf/utf + X\xdf +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1 + XX\xef +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 + XXX\xef\x80 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 + X\xf7 +Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1 + XX\xf7\x80 +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 + XXX\xf7\x80\x80 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 + +/shortutf/utf + XX\xdf\=ph +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 + XX\xef\=ph +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 + XX\xef\x80\=ph +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 + \xf7\=ph +Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 + \xf7\x80\=ph +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 + +# End of testinput14 diff --git a/pcre2/testdata/testoutput15 b/pcre2/testdata/testoutput15 index fb945fd46..b6527c953 100644 --- a/pcre2/testdata/testoutput15 +++ b/pcre2/testdata/testoutput15 @@ -1,17 +1,390 @@ -# This test is run only when JIT support is not available. It checks that an -# attempt to use it has the expected behaviour. It also tests things that -# are different without JIT. +# These are: +# +# (1) Tests of the match-limiting features. The results are different for +# interpretive or JIT matching, so this test should not be run with JIT. The +# same tests are run using JIT in test 17. -/abc/I,jit,jitverify +# (2) Other tests that must not be run with JIT. + +/(a+)*zz/I +Capturing subpattern count = 1 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits +Minimum match limit = 8 +Minimum recursion limit = 6 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + aaaaaaaaaaaaaz\=find_limits +Minimum match limit = 32768 +Minimum recursion limit = 29 +No match + +!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I +Capturing subpattern count = 1 +May match empty string +Subject length lower bound = 0 + /* this is a C style comment */\=find_limits +Minimum match limit = 120 +Minimum recursion limit = 6 + 0: /* this is a C style comment */ + 1: /* this is a C style comment */ + +/^(?>a)++/ + aa\=find_limits +Minimum match limit = 5 +Minimum recursion limit = 2 + 0: aa + aaaaaaaaa\=find_limits +Minimum match limit = 12 +Minimum recursion limit = 2 + 0: aaaaaaaaa + +/(a)(?1)++/ + aa\=find_limits +Minimum match limit = 7 +Minimum recursion limit = 4 + 0: aa + 1: a + aaaaaaaaa\=find_limits +Minimum match limit = 21 +Minimum recursion limit = 4 + 0: aaaaaaaaa + 1: a + +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 65 +Minimum recursion limit = 2 + 0: abbbbbbbbbbbbbbbbbbbbba + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 86 +Minimum recursion limit = 45 + 0: abbbbbbbbbbbbbbbbbbbbba + +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 86 +Minimum recursion limit = 45 + 0: abbbbbbbbbbbbbbbbbbbbba + +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits +Minimum match limit = 7 +Minimum recursion limit = 2 + 0: aabbccddee + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits +Minimum match limit = 17 +Minimum recursion limit = 16 + 0: aabbccddee + 1: aa + 2: bb + 3: cc + 4: dd + 5: ee + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits +Minimum match limit = 13 +Minimum recursion limit = 10 + 0: aabbccddee + 1: aa + 2: cc + 3: ee + +/(*LIMIT_MATCH=12bc)abc/ +Failed: error 160 at offset 17: (*VERB) not recognized or malformed + +/(*LIMIT_MATCH=4294967290)abc/ +Failed: error 160 at offset 24: (*VERB) not recognized or malformed + +/(*LIMIT_RECURSION=4294967280)abc/I Capturing subpattern count = 0 +Recursion limit = 4294967280 First code unit = 'a' Last code unit = 'c' Subject length lower bound = 3 -JIT support is not available in this version of PCRE2 -/a*/I +/(a+)*zz/ + aaaaaaaaaaaaaz +No match + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +/(a+)*zz/ + aaaaaaaaaaaaaz\=recursion_limit=10 +Failed: error -53: recursion limit exceeded + +/(*LIMIT_MATCH=3000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded + aaaaaaaaaaaaaz\=match_limit=60000 +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 60000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaz +No match + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +/(*LIMIT_RECURSION=10)(a+)*zz/I +Capturing subpattern count = 1 +Recursion limit = 10 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaz +Failed: error -53: recursion limit exceeded + aaaaaaaaaaaaaz\=recursion_limit=1000 +Failed: error -53: recursion limit exceeded + +/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I +Capturing subpattern count = 1 +Recursion limit = 1000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaz +No match + +/(*LIMIT_RECURSION=1000)(a+)*zz/I +Capturing subpattern count = 1 +Recursion limit = 1000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaz +No match + aaaaaaaaaaaaaz\=recursion_limit=10 +Failed: error -53: recursion limit exceeded + +# These three have infinitely nested recursions. + +/((?2))((?1))/ + abc +Failed: error -52: nested recursion at the same subject position + +/((?(R2)a+|(?1)b))()/ + aaaabcde +Failed: error -52: nested recursion at the same subject position + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde +Failed: error -52: nested recursion at the same subject position + +# The allusedtext modifier does not work with JIT, which does not maintain +# the leftchar/rightchar data. + +/abc(?=xyz)/allusedtext + abcxyzpqr + 0: abcxyz + >>> + abcxyzpqr\=aftertext + 0: abcxyz + >>> + 0+ xyzpqr + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + 0: pqrabcxyz + <<< >>> + xyzpqrabcxyzpqr\=aftertext + 0: pqrabcxyz + <<< >>> + 0+ xyzpqr + +/a\b/ + a.\=allusedtext + 0: a. + > + a\=allusedtext + 0: a + +/abc\Kxyz/ + abcxyz\=allusedtext + 0: abcxyz + <<< + +/abc(?=xyz(*ACCEPT))/ + abcxyz\=allusedtext + 0: abcxyz + >>> + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg + 0: abcabcde + >>>>> + +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I Capturing subpattern count = 0 May match empty string Subject length lower bound = 0 + abcd +Failed: error -52: nested recursion at the same subject position + +/(a|(?R))/I +Capturing subpattern count = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: a + 1: a + defg +Failed: error -52: nested recursion at the same subject position + +/(ab|(bc|(de|(?R))))/I +Capturing subpattern count = 3 +May match empty string +Subject length lower bound = 0 + abcd + 0: ab + 1: ab + fghi +Failed: error -52: nested recursion at the same subject position + +/(ab|(bc|(de|(?1))))/I +Capturing subpattern count = 3 +May match empty string +Subject length lower bound = 0 + abcd + 0: ab + 1: ab + fghi +Failed: error -52: nested recursion at the same subject position + +/x(ab|(bc|(de|(?1)x)x)x)/I +Capturing subpattern count = 3 +First code unit = 'x' +Subject length lower bound = 3 + xab123 + 0: xab + 1: ab + xfghi +Failed: error -52: nested recursion at the same subject position + +/(?!\w)(?R)/ + abcd +Failed: error -52: nested recursion at the same subject position + =abc +Failed: error -52: nested recursion at the same subject position + +/(?=\w)(?R)/ + =abc +Failed: error -52: nested recursion at the same subject position + abcd +Failed: error -52: nested recursion at the same subject position + +/(?abc + 1 ^ ^ + 1 ^ ^ + 1 ^^ + 1 ^ ^ + 1 ^^ + 1 ^^ +No match + +/(*NO_AUTO_POSSESS)\w+(?C1)/BI +------------------------------------------------------------------ + Bra + \w+ + Callout 1 26 0 + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Compile options: +Overall options: no_auto_possess +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + abc\=callout_fail=1 +--->abc + 1 ^ ^ + 1 ^ ^ + 1 ^^ + 1 ^ ^ + 1 ^^ + 1 ^^ +No match + +# This test breaks the JIT stack limit + +/(|]+){2,2452}/ + (|]+){2,2452} + 0: + 1: # End of testinput15 diff --git a/pcre2/testdata/testoutput16 b/pcre2/testdata/testoutput16 index ae4fb41ea..616567b5f 100644 --- a/pcre2/testdata/testoutput16 +++ b/pcre2/testdata/testoutput16 @@ -1,384 +1,17 @@ -# This test is run only when JIT support is available. It checks JIT complete -# and partial modes, and things that are different with JIT. +# This test is run only when JIT support is not available. It checks that an +# attempt to use it has the expected behaviour. It also tests things that +# are different without JIT. -#pattern jitverify +/abc/I,jit,jitverify +Capturing subpattern count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 +JIT support is not available in this version of PCRE2 -# JIT does not support this pattern (callout at start of condition). - -/(?(?C1)(?=a)a)/I +/a*/I Capturing subpattern count = 0 May match empty string Subject length lower bound = 0 -JIT compilation was not successful - -# The following pattern cannot be compiled by JIT. - -/b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*/I -Capturing subpattern count = 0 -May match empty string -Subject length lower bound = 0 -JIT compilation was not successful - -# Check that an infinite recursion loop is caught. - -/(?(R)a*(?1)|((?R))b)/ - aaaabcde -Failed: error -46: JIT stack limit reached - -/abcd/I -Capturing subpattern count = 0 -First code unit = 'a' -Last code unit = 'd' -Subject length lower bound = 4 -JIT compilation was successful - abcd - 0: abcd (JIT) - xyz -No match (JIT) - -/(*NO_JIT)abcd/I -Capturing subpattern count = 0 -First code unit = 'a' -Last code unit = 'd' -Subject length lower bound = 4 -JIT compilation was not successful - abcd - 0: abcd - xyz -No match - -/abcd/ - abcd - 0: abcd (JIT) - ab\=ps -Partial match: ab (JIT) - ab\=ph -Partial match: ab (JIT) - xyz -No match (JIT) - -/abcd/jitfast - abcd - 0: abcd (JIT) - ab\=ps -Partial match: ab (JIT) - ab\=ph -Partial match: ab (JIT) - xyz -No match (JIT) - -/abcd/jit=1 - abcd - 0: abcd (JIT) - ab\=ps -Partial match: ab - ab\=ph -Partial match: ab - xyz -No match (JIT) - xyz\=ps -No match - -/abcd/jit=1,jitfast - abcd - 0: abcd (JIT) - ab\=ps -Failed: error -45: bad JIT option - ab\=ph -Failed: error -45: bad JIT option - xyz -No match (JIT) - xyz\=ps -Failed: error -45: bad JIT option - -/abcd/jit=2 - abcd - 0: abcd - ab\=ps -Partial match: ab (JIT) - ab\=ph -Partial match: ab - xyz -No match - -/abcd/jit=2,jitfast - abcd -Failed: error -45: bad JIT option - ab\=ps -Partial match: ab (JIT) - ab\=ph -Failed: error -45: bad JIT option - xyz -Failed: error -45: bad JIT option - -/abcd/jit=3 - abcd - 0: abcd (JIT) - ab\=ps -Partial match: ab (JIT) - ab\=ph -Partial match: ab - xyz -No match (JIT) - -/abcd/jit=4 - abcd - 0: abcd - ab\=ps -Partial match: ab - ab\=ph -Partial match: ab (JIT) - xyz -No match - -/abcd/jit=5 - abcd - 0: abcd (JIT) - ab\=ps -Partial match: ab - ab\=ph -Partial match: ab (JIT) - xyz -No match (JIT) - -/abcd/jit=6 - abcd - 0: abcd - ab\=ps -Partial match: ab (JIT) - ab\=ph -Partial match: ab (JIT) - xyz -No match - -/abcd/jit=7 - abcd - 0: abcd (JIT) - ab\=ps -Partial match: ab (JIT) - ab\=ph -Partial match: ab (JIT) - xyz -No match (JIT) - -/abcd/I,jit=2 -Capturing subpattern count = 0 -First code unit = 'a' -Last code unit = 'd' -Subject length lower bound = 4 -JIT compilation was successful - -/(*NO_START_OPT)a(*:m)b/mark - a -No match, mark = m (JIT) - -/^12345678abcd/m - 12345678abcd - 0: 12345678abcd (JIT) - -# Limits tests that give different output with JIT. - -/(a+)*zz/I -Capturing subpattern count = 1 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 -JIT compilation was successful - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits -Minimum match limit = 3 - 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz (JIT) - 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa - aaaaaaaaaaaaaz\=find_limits -Minimum match limit = 16384 -No match (JIT) - -!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I -Capturing subpattern count = 1 -May match empty string -Subject length lower bound = 0 -JIT compilation was successful - /* this is a C style comment */\=find_limits -Minimum match limit = 2 - 0: /* this is a C style comment */ (JIT) - 1: /* this is a C style comment */ - -/^(?>a)++/ - aa\=find_limits -Minimum match limit = 2 - 0: aa (JIT) - aaaaaaaaa\=find_limits -Minimum match limit = 2 - 0: aaaaaaaaa (JIT) - -/(a)(?1)++/ - aa\=find_limits -Minimum match limit = 2 - 0: aa (JIT) - 1: a - aaaaaaaaa\=find_limits -Minimum match limit = 2 - 0: aaaaaaaaa (JIT) - 1: a - -/a(?:.)*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 1 - 0: abbbbbbbbbbbbbbbbbbbbba (JIT) - -/a(?:.(*THEN))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 1 - 0: abbbbbbbbbbbbbbbbbbbbba (JIT) - -/a(?:.(*THEN:ABC))*?a/ims - abbbbbbbbbbbbbbbbbbbbba\=find_limits -Minimum match limit = 1 - 0: abbbbbbbbbbbbbbbbbbbbba (JIT) - -/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ - aabbccddee\=find_limits -Minimum match limit = 6 - 0: aabbccddee (JIT) - -/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ - aabbccddee\=find_limits -Minimum match limit = 6 - 0: aabbccddee (JIT) - 1: aa - 2: bb - 3: cc - 4: dd - 5: ee - -/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ - aabbccddee\=find_limits -Minimum match limit = 6 - 0: aabbccddee (JIT) - 1: aa - 2: cc - 3: ee - -/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast - aabbccddee\=find_limits -Minimum match limit = 6 - 0: aabbccddee (JIT) - 1: aa - 2: cc - 3: ee - aabbccddee\=jitstack=1 - 0: aabbccddee (JIT) - 1: aa - 2: cc - 3: ee - -/(a+)*zz/ - aaaaaaaaaaaaaz -No match (JIT) - aaaaaaaaaaaaaz\=match_limit=3000 -Failed: error -47: match limit exceeded - -/(*LIMIT_MATCH=3000)(a+)*zz/I -Capturing subpattern count = 1 -Match limit = 3000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 -JIT compilation was successful - aaaaaaaaaaaaaz -Failed: error -47: match limit exceeded - aaaaaaaaaaaaaz\=match_limit=60000 -Failed: error -47: match limit exceeded - -/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I -Capturing subpattern count = 1 -Match limit = 3000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 -JIT compilation was successful - aaaaaaaaaaaaaz -Failed: error -47: match limit exceeded - -/(*LIMIT_MATCH=60000)(a+)*zz/I -Capturing subpattern count = 1 -Match limit = 60000 -Starting code units: a z -Last code unit = 'z' -Subject length lower bound = 2 -JIT compilation was successful - aaaaaaaaaaaaaz -No match (JIT) - aaaaaaaaaaaaaz\=match_limit=3000 -Failed: error -47: match limit exceeded - -# These three have infinitely nested recursions. - -/((?2))((?1))/ - abc -Failed: error -46: JIT stack limit reached - -/((?(R2)a+|(?1)b))/ - aaaabcde -Failed: error -46: JIT stack limit reached - -/(?(R)a*(?1)|((?R))b)/ - aaaabcde -Failed: error -46: JIT stack limit reached - -# Invalid options disable JIT when called via pcre2_match(), causing the -# match to happen via the interpreter, but for fast JIT invalid options are -# ignored, so an unanchored match happens. - -/abcd/ - abcd\=anchored - 0: abcd - fail abcd\=anchored -No match - -/abcd/jitfast - abcd\=anchored - 0: abcd (JIT) - succeed abcd\=anchored - 0: abcd (JIT) - -# Push/pop does not lose the JIT information, though jitverify applies only to -# compilation, but serializing (save/load) discards JIT data completely. - -/^abc\Kdef/info,push -** Applies only to compile when pattern is stacked with 'push': jitverify -Capturing subpattern count = 0 -Compile options: -Overall options: anchored -Subject length lower bound = 6 -JIT compilation was successful -#pop jitverify - abcdef - 0: def (JIT) - -/^abc\Kdef/info,push -** Applies only to compile when pattern is stacked with 'push': jitverify -Capturing subpattern count = 0 -Compile options: -Overall options: anchored -Subject length lower bound = 6 -JIT compilation was successful -#save testsaved1 -#load testsaved1 -#pop jitverify - abcdef - 0: def - -#load testsaved1 -#pop jit,jitverify - abcdef - 0: def (JIT) - -# Test pattern compilation - -/(?:a|b|c|d|e)(?R)/jit=1 - -/(?:a|b|c|d|e)(?R)(?R)/jit=1 - -/(a(?:a|b|c|d|e)b){8,16}/jit=1 # End of testinput16 diff --git a/pcre2/testdata/testoutput17 b/pcre2/testdata/testoutput17 index f46b7e733..75dce1082 100644 --- a/pcre2/testdata/testoutput17 +++ b/pcre2/testdata/testoutput17 @@ -1,148 +1,544 @@ -# This set of tests is run only with the 8-bit library. It tests the POSIX -# interface, which is supported only with the 8-bit library. This test should -# not be run with JIT (which is not available for the POSIX interface). +# This test is run only when JIT support is available. It checks JIT complete +# and partial modes, and things that are different with JIT. + +#pattern jitverify + +# JIT does not support this pattern (callout at start of condition). + +/(?(?C1)(?=a)a)/I +Capturing subpattern count = 0 +May match empty string +Subject length lower bound = 0 +JIT compilation was not successful (no more memory) + +# The following pattern cannot be compiled by JIT. + +/b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*/I +Capturing subpattern count = 0 +May match empty string +Subject length lower bound = 0 +JIT compilation was not successful (no more memory) + +# Check that an infinite recursion loop is caught. + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde +Failed: error -46: JIT stack limit reached + +/abcd/I +Capturing subpattern count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 +JIT compilation was successful + abcd + 0: abcd (JIT) +\= Expect no match + xyz +No match (JIT) + +/(*NO_JIT)abcd/I +Capturing subpattern count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 +JIT compilation was not successful + abcd + 0: abcd +\= Expect no match + xyz +No match + +/abcd/ + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/jitfast + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=1 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab + ab\=ph +Partial match: ab +\= Expect no match + xyz +No match (JIT) + xyz\=ps +No match + +/abcd/jit=1,jitfast + abcd + 0: abcd (JIT) + ab\=ps +Failed: error -45: bad JIT option + ab\=ph +Failed: error -45: bad JIT option + xyz\=ps +Failed: error -45: bad JIT option +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=2 + abcd + 0: abcd + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab +\= Expect no match + xyz +No match + +/abcd/jit=2,jitfast + abcd +Failed: error -45: bad JIT option + ab\=ps +Partial match: ab (JIT) + ab\=ph +Failed: error -45: bad JIT option + xyz +Failed: error -45: bad JIT option + +/abcd/jit=3 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=4 + abcd + 0: abcd + ab\=ps +Partial match: ab + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match + +/abcd/jit=5 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=6 + abcd + 0: abcd + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match + +/abcd/jit=7 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/I,jit=2 +Capturing subpattern count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 +JIT compilation was successful + +/(*NO_START_OPT)a(*:m)b/mark +\= Expect no match + a +No match, mark = m (JIT) + +/^12345678abcd/m + 12345678abcd + 0: 12345678abcd (JIT) -#forbid_utf -#pattern posix +# Limits tests that give different output with JIT. -# Test invalid options +/(a+)*zz/I +Capturing subpattern count = 1 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits +Minimum match limit = 2 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz (JIT) + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +\= Expect no match + aaaaaaaaaaaaaz\=find_limits +Minimum match limit = 16383 +No match (JIT) -/abc/auto_callout -** Ignored with POSIX interface: auto_callout +!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I +Capturing subpattern count = 1 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + /* this is a C style comment */\=find_limits +Minimum match limit = 29 + 0: /* this is a C style comment */ (JIT) + 1: /* this is a C style comment */ -/abc/ - abc\=find_limits -** Ignored with POSIX interface: find_limits - 0: abc +/^(?>a)++/ + aa\=find_limits +Minimum match limit = 1 + 0: aa (JIT) + aaaaaaaaa\=find_limits +Minimum match limit = 1 + 0: aaaaaaaaa (JIT) + +/(a)(?1)++/ + aa\=find_limits +Minimum match limit = 1 + 0: aa (JIT) + 1: a + aaaaaaaaa\=find_limits +Minimum match limit = 1 + 0: aaaaaaaaa (JIT) + 1: a -/abc/ - abc\=partial_hard -** Ignored with POSIX interface: partial_hard - 0: abc +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 22 + 0: abbbbbbbbbbbbbbbbbbbbba (JIT) + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 22 + 0: abbbbbbbbbbbbbbbbbbbbba (JIT) -# Real tests +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 22 + 0: abbbbbbbbbbbbbbbbbbbbba (JIT) -/abc/ +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + 1: aa + 2: bb + 3: cc + 4: dd + 5: ee + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + 1: aa + 2: cc + 3: ee + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + 1: aa + 2: cc + 3: ee + aabbccddee\=jitstack=1 + 0: aabbccddee (JIT) + 1: aa + 2: cc + 3: ee + +/(a+)*zz/ +\= Expect no match + aaaaaaaaaaaaaz +No match (JIT) +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=3000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded + aaaaaaaaaaaaaz\=match_limit=60000 +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 60000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful +\= Expect no match + aaaaaaaaaaaaaz +No match (JIT) +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +# These three have infinitely nested recursions. + +/((?2))((?1))/ abc - 0: abc - *** Failers -No match: POSIX code 17: match failed +Failed: error -46: JIT stack limit reached -/^abc|def/ +/((?(R2)a+|(?1)b))()/ + aaaabcde +Failed: error -46: JIT stack limit reached + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde +Failed: error -46: JIT stack limit reached + +# Invalid options disable JIT when called via pcre2_match(), causing the +# match to happen via the interpreter, but for fast JIT invalid options are +# ignored, so an unanchored match happens. + +/abcd/ + abcd\=anchored + 0: abcd +\= Expect no match + fail abcd\=anchored +No match + +/abcd/jitfast + abcd\=anchored + 0: abcd (JIT) + succeed abcd\=anchored + 0: abcd (JIT) + +# Push/pop does not lose the JIT information, though jitverify applies only to +# compilation, but serializing (save/load) discards JIT data completely. + +/^abc\Kdef/info,push +** Applies only to compile when pattern is stacked with 'push': jitverify +Capturing subpattern count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 6 +JIT compilation was successful +#pop jitverify + abcdef + 0: def (JIT) + +/^abc\Kdef/info,push +** Applies only to compile when pattern is stacked with 'push': jitverify +Capturing subpattern count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 6 +JIT compilation was successful +#save testsaved1 +#load testsaved1 +#pop jitverify abcdef - 0: abc - abcdef\=notbol 0: def + +#load testsaved1 +#pop jit,jitverify + abcdef + 0: def (JIT) + +/abcd/pushcopy,jitverify +** Applies only to compile when pattern is stacked with 'push': jitverify + abcd + 0: abcd (JIT) + +#pop jitverify + abcd + 0: abcd + +# Test pattern compilation -/.*((abc)$|(def))/ - defabc - 0: defabc - 1: abc - 2: abc - defabc\=noteol - 0: def - 1: def - 3: def +/(?:a|b|c|d|e)(?R)/jit=1 -/the quick brown fox/ - the quick brown fox - 0: the quick brown fox - *** Failers -No match: POSIX code 17: match failed - The Quick Brown Fox -No match: POSIX code 17: match failed +/(?:a|b|c|d|e)(?R)(?R)/jit=1 -/the quick brown fox/i - the quick brown fox - 0: the quick brown fox - The Quick Brown Fox - 0: The Quick Brown Fox +/(a(?:a|b|c|d|e)b){8,16}/jit=1 -/abc.def/ - *** Failers -No match: POSIX code 17: match failed - abc\ndef -No match: POSIX code 17: match failed +/(?:|a|){100}x/jit=1 -/abc$/ +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I +Capturing subpattern count = 0 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd +Failed: error -46: JIT stack limit reached + +/(a|(?R))/I +Capturing subpattern count = 1 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd + 0: a (JIT) + 1: a + defg +Failed: error -46: JIT stack limit reached + +/(ab|(bc|(de|(?R))))/I +Capturing subpattern count = 3 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd + 0: ab (JIT) + 1: ab + fghi +Failed: error -46: JIT stack limit reached + +/(ab|(bc|(de|(?1))))/I +Capturing subpattern count = 3 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd + 0: ab (JIT) + 1: ab + fghi +Failed: error -46: JIT stack limit reached + +/x(ab|(bc|(de|(?1)x)x)x)/I +Capturing subpattern count = 3 +First code unit = 'x' +Subject length lower bound = 3 +JIT compilation was successful + xab123 + 0: xab (JIT) + 1: ab + xfghi +Failed: error -46: JIT stack limit reached + +/(?!\w)(?R)/ + abcd +Failed: error -46: JIT stack limit reached + =abc +Failed: error -46: JIT stack limit reached + +/(?=\w)(?R)/ + =abc +Failed: error -46: JIT stack limit reached + abcd +Failed: error -46: JIT stack limit reached + +/(?b)c/no_auto_capture - abc -Matched with REG_NOSUB - -/a?|b?/ - abc - 0: a - ** Failers - 0: - ddd\=notempty -No match: POSIX code 17: match failed - -/\w+A/ - CDAAAAB - 0: CDAAAA - -/\w+A/ungreedy - CDAAAAB - 0: CDA - -/\Biss\B/I,aftertext -** Ignored with POSIX interface: info - Mississippi - 0: iss - 0+ issippi - -/abc/\ -Failed: POSIX code 9: bad escape sequence at offset 4 - -"(?(?C)" -Failed: POSIX code 3: pattern error at offset 2 - -# End of testdata/testinput16 +# End of testinput17 diff --git a/pcre2/testdata/testoutput18 b/pcre2/testdata/testoutput18 index 954b4b58f..fd6fac382 100644 --- a/pcre2/testdata/testoutput18 +++ b/pcre2/testdata/testoutput18 @@ -1,20 +1,171 @@ # This set of tests is run only with the 8-bit library. It tests the POSIX -# interface with UTF/UCP support, which is supported only with the 8-bit -# library. This test should not be run with JIT (which is not available for the -# POSIX interface). +# interface, which is supported only with the 8-bit library. This test should +# not be run with JIT (which is not available for the POSIX interface). +#forbid_utf #pattern posix -/a\x{1234}b/utf - a\x{1234}b - 0: a\x{1234}b +# Test invalid options -/\w/ - +++\x{c2} +/abc/auto_callout +** Ignored with POSIX interface: auto_callout + +/abc/ + abc\=find_limits +** Ignored with POSIX interface: find_limits + 0: abc + +/abc/ + abc\=partial_hard +** Ignored with POSIX interface: partial_hard + 0: abc + +# Real tests + +/abc/ + abc + 0: abc + +/^abc|def/ + abcdef + 0: abc + abcdef\=notbol + 0: def + +/.*((abc)$|(def))/ + defabc + 0: defabc + 1: abc + 2: abc + defabc\=noteol + 0: def + 1: def + 3: def + +/the quick brown fox/ + the quick brown fox + 0: the quick brown fox +\= Expect no match + The Quick Brown Fox No match: POSIX code 17: match failed -/\w/ucp - +++\x{c2} - 0: \xc2 - -# End of testdata/testinput17 +/the quick brown fox/i + the quick brown fox + 0: the quick brown fox + The Quick Brown Fox + 0: The Quick Brown Fox + +/(*LF)abc.def/ +\= Expect no match + abc\ndef +No match: POSIX code 17: match failed + +/(*LF)abc$/ + abc + 0: abc + abc\n + 0: abc + +/(abc)\2/ +Failed: POSIX code 15: bad back reference at offset 6 + +/(abc\1)/ +\= Expect no match + abc +No match: POSIX code 17: match failed + +/a*(b+)(z)(z)/ + aaaabbbbzzzz + 0: aaaabbbbzz + 1: bbbb + 2: z + 3: z + aaaabbbbzzzz\=ovector=0 +Matched without capture + aaaabbbbzzzz\=ovector=1 + 0: aaaabbbbzz + aaaabbbbzzzz\=ovector=2 + 0: aaaabbbbzz + 1: bbbb + +/(*ANY)ab.cd/ + ab-cd + 0: ab-cd + ab=cd + 0: ab=cd +\= Expect no match + ab\ncd +No match: POSIX code 17: match failed + +/ab.cd/s + ab-cd + 0: ab-cd + ab=cd + 0: ab=cd + ab\ncd + 0: ab\x0acd + +/a(b)c/posix_nosub + abc +Matched with REG_NOSUB + +/a(?Pb)c/posix_nosub + abc +Matched with REG_NOSUB + +/(a)\1/posix_nosub + zaay +Matched with REG_NOSUB + +/a?|b?/ + abc + 0: a +\= Expect no match + ddd\=notempty +No match: POSIX code 17: match failed + +/\w+A/ + CDAAAAB + 0: CDAAAA + +/\w+A/ungreedy + CDAAAAB + 0: CDA + +/\Biss\B/I,aftertext +** Ignored with POSIX interface: info + Mississippi + 0: iss + 0+ issippi + +/abc/\ +Failed: POSIX code 9: bad escape sequence at offset 4 + +"(?(?C)" +Failed: POSIX code 11: unbalanced () at offset 6 + +"(?(?C))" +Failed: POSIX code 3: pattern error at offset 6 + +/abcd/substitute_extended +** Ignored with POSIX interface: substitute_extended + +/\[A]{1000000}**/expand,regerror_buffsize=31 +Failed: POSIX code 4: ? * + invalid at offset 100000 +** regerror() message truncated + +/\[A]{1000000}**/expand,regerror_buffsize=32 +Failed: POSIX code 4: ? * + invalid at offset 1000001 + +//posix_nosub + \=offset=70000 +** Ignored with POSIX interface: offset +Matched with REG_NOSUB + +/(?=(a\K))/ + a +Start of matched string is beyond its end - displaying from end to start. + 0: a + 1: a + +# End of testdata/testinput18 diff --git a/pcre2/testdata/testoutput19 b/pcre2/testdata/testoutput19 index 7f3aa0c28..c4169ca08 100644 --- a/pcre2/testdata/testoutput19 +++ b/pcre2/testdata/testoutput19 @@ -1,100 +1,21 @@ -# This set of tests exercises the serialization/deserialization functions in -# the library. It does not use UTF or JIT. - -#forbid_utf - -# Compile several patterns, push them onto the stack, and then write them -# all to a file. - -#pattern push - -/(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) - (?(DEFINE) - (?[a-z]+) - (?\d+) - )/x -/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i - -#save testsaved1 - -# Do it again for some more patterns. - -/(*MARK:A)(*SKIP:B)(C|X)/mark -** Ignored when compiled pattern is stacked with 'push': mark -/(?:(?foo)|(?bar))\k/dupnames - -#save testsaved2 -#pattern -push - -# Reload the patterns, then pop them one by one and check them. - -#load testsaved1 -#load testsaved2 - -#pop info -Capturing subpattern count = 2 -Max back reference = 2 -Named capturing subpatterns: - n 1 - n 2 -Options: dupnames -Starting code units: b f -Subject length lower bound = 6 - foofoo - 0: foofoo - 1: foo - barbar - 0: barbar - 1: - 2: bar +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface with UTF/UCP support, which is supported only with the 8-bit +# library. This test should not be run with JIT (which is not available for the +# POSIX interface). -#pop mark - C - 0: C - 1: C -MK: A - D -No match, mark = A +#pattern posix + +/a\x{1234}b/utf + a\x{1234}b + 0: a\x{1234}b + +/\w/ +\= Expect no match + +++\x{c2} +No match: POSIX code 17: match failed + +/\w/ucp + +++\x{c2} + 0: \xc2 -#pop - AmanaplanacanalPanama - 0: AmanaplanacanalPanama - 1: - 2: - 3: AmanaplanacanalPanama - 4: A - -#pop info -Capturing subpattern count = 4 -Named capturing subpatterns: - ADDR 2 - ADDRESS_PAT 4 - NAME 1 - NAME_PAT 3 -Options: extended -Subject length lower bound = 3 - metcalfe 33 - 0: metcalfe 33 - 1: metcalfe - 2: 33 - -# Check for an error when different tables are used. - -/abc/push,tables=1 -/xyz/push,tables=2 -#save testsaved1 -Serialization failed: error -30: patterns do not all use the same character tables - -#pop - xyz - 0: xyz - -#pop - abc - 0: abc - -#pop should give an error -** Can't pop off an empty stack - pqr - -# End of testinput19 +# End of testdata/testinput19 diff --git a/pcre2/testdata/testoutput2 b/pcre2/testdata/testoutput2 index b62846612..ce8c66752 100644 --- a/pcre2/testdata/testoutput2 +++ b/pcre2/testdata/testoutput2 @@ -9,6 +9,7 @@ # test 5. #forbid_utf +#newline_default lf any anycrlf # Test binary zeroes in the pattern @@ -61,8 +62,7 @@ Subject length lower bound = 3 0: abc abc\=anchored 0: abc - *** Failers -No match +\= Expect no match defabc\=anchored No match ABC @@ -77,8 +77,7 @@ Subject length lower bound = 3 0: abc abc\=anchored 0: abc - *** Failers -No match +\= Expect no match defabc No match defabc\=anchored @@ -114,8 +113,7 @@ Overall options: anchored Subject length lower bound = 3 abc 0: abc - *** Failers -No match +\= Expect no match def\nabc No match @@ -227,7 +225,7 @@ Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b Subject length lower bound = 1 /(ab\2)/ -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 4: reference to non-existent subpattern /{4,5}abc/ Failed: error 109 at offset 4: quantifier does not follow a repeatable item @@ -319,15 +317,14 @@ Last code unit = 'c' Subject length lower bound = 3 abc 0: abc - *** Failers -No match +\= Expect no match abc\n No match abc\ndef No match /(a)(b)(c)(d)(e)\6/ -Failed: error 115 at offset 17: reference to non-existent subpattern +Failed: error 115 at offset 16: reference to non-existent subpattern /the quick brown fox/I Capturing subpattern count = 0 @@ -345,8 +342,7 @@ Options: anchored Subject length lower bound = 19 the quick brown fox 0: the quick brown fox - *** Failers -No match +\= Expect no match this is a line with the quick brown fox No match @@ -428,8 +424,6 @@ Subject length lower bound = 2 /(?U)<.*>/I Capturing subpattern count = 0 -Compile options: -Overall options: ungreedy First code unit = '<' Last code unit = '>' Subject length lower bound = 2 @@ -456,8 +450,6 @@ Subject length lower bound = 3 /(?U)={3,}?/I Capturing subpattern count = 0 -Compile options: -Overall options: ungreedy First code unit = '=' Last code unit = '=' Subject length lower bound = 3 @@ -474,26 +466,22 @@ Subject length lower bound = 3 0: foo catfoo 0: foo - *** Failers -No match +\= Expect no match the barfoo No match and cattlefoo No match -/(?<=a+)b/ -Failed: error 125 at offset 6: lookbehind assertion is not fixed length +/abc(?<=a+)b/ +Failed: error 125 at offset 3: lookbehind assertion is not fixed length -/(?<=aaa|b{0,3})b/ -Failed: error 125 at offset 14: lookbehind assertion is not fixed length +/12345(?<=aaa|b{0,3})b/ +Failed: error 125 at offset 5: lookbehind assertion is not fixed length /(? -Overall options: caseless First code unit = 'a' (caseless) Last code unit = 'c' (caseless) Subject length lower bound = 3 @@ -506,7 +494,7 @@ Subject length lower bound = 1 /(?i)^1234/I Capturing subpattern count = 0 Compile options: -Overall options: anchored caseless +Overall options: anchored Subject length lower bound = 4 /(^b|(?i)^d)/I @@ -519,7 +507,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 May match empty string Compile options: -Overall options: anchored dotall +Overall options: anchored Subject length lower bound = 0 /[abcd]/I @@ -529,15 +517,11 @@ Subject length lower bound = 1 /(?i)[abcd]/I Capturing subpattern count = 0 -Compile options: -Overall options: caseless Starting code units: A B C D a b c d Subject length lower bound = 1 /(?m)[xy]|(b|c)/I Capturing subpattern count = 1 -Compile options: -Overall options: multiline Starting code units: b c x y Subject length lower bound = 1 @@ -549,31 +533,30 @@ Subject length lower bound = 1 /(?i)(^a|^b)/Im Capturing subpattern count = 1 -Compile options: multiline -Overall options: caseless multiline +Options: multiline First code unit at start or follows newline Subject length lower bound = 1 /(a)(?(1)a|b|c)/ -Failed: error 127 at offset 13: conditional group contains more than two branches +Failed: error 127 at offset 3: conditional group contains more than two branches /(?(?=a)a|b|c)/ -Failed: error 127 at offset 12: conditional group contains more than two branches +Failed: error 127 at offset 0: conditional group contains more than two branches /(?(1a)/ -Failed: error 126 at offset 4: malformed number or name after (?( +Failed: error 124 at offset 4: missing closing parenthesis for condition /(?(1a))/ -Failed: error 126 at offset 4: malformed number or name after (?( +Failed: error 124 at offset 4: missing closing parenthesis for condition /(?(?i))/ -Failed: error 128 at offset 3: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) /(?(abc))/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern /(?(? Overall options: anchored Subject length lower bound = 4 +\= Expect no match aaaa No match aaaaaa No match -/Perl does not fail these two for the final subjects. Neither did PCRE until/ -/release 8.01. The problem is in backtracking into a subpattern that contains/ -No match -/a recursive reference to itself. PCRE has now made these into atomic patterns./ -No match +# Perl does not fail these two for the final subjects. Neither did PCRE until +# release 8.01. The problem is in backtracking into a subpattern that contains +# a recursive reference to itself. PCRE has now made these into atomic patterns. /^(xa|=?\1a){2}$/ xa=xaa 0: xa=xaa 1: =xaa - ** Failers -No match +\= Expect no match xa=xaaa No match @@ -862,16 +831,11 @@ No match xa=xaa 0: xa=xaa 1: =xaa - ** Failers -No match +\= Expect no match xa=xaaa No match -/These are syntax tests from Perl 5.005/I -Capturing subpattern count = 0 -First code unit = 'T' -Last code unit = '5' -Subject length lower bound = 38 +# These are syntax tests from Perl 5.005 /a[b-a]/ Failed: error 108 at offset 4: range out of order in character class @@ -901,13 +865,13 @@ Failed: error 109 at offset 2: quantifier does not follow a repeatable item Failed: error 122 at offset 0: unmatched closing parenthesis /\1/ -Failed: error 115 at offset 2: reference to non-existent subpattern +Failed: error 115 at offset 1: reference to non-existent subpattern /\2/ -Failed: error 115 at offset 2: reference to non-existent subpattern +Failed: error 115 at offset 1: reference to non-existent subpattern /(a)|\2/ -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 5: reference to non-existent subpattern /a[b-a]/Ii Failed: error 108 at offset 4: range out of order in character class @@ -940,7 +904,7 @@ Failed: error 122 at offset 0: unmatched closing parenthesis Failed: error 114 at offset 4: missing closing parenthesis /(?<%)b/ -Failed: error 124 at offset 3: unrecognized character after (?< +Failed: error 162 at offset 3: subpattern name expected /a(?{)b/ Failed: error 111 at offset 3: unrecognized character after (? or (?- @@ -958,13 +922,13 @@ Failed: error 111 at offset 3: unrecognized character after (? or (?- Failed: error 111 at offset 3: unrecognized character after (? or (?- /(?(1?)a|b)/ -Failed: error 126 at offset 4: malformed number or name after (?( +Failed: error 124 at offset 4: missing closing parenthesis for condition /[a[:xyz:/ Failed: error 106 at offset 8: missing terminating ] for character class /(?<=x+)y/ -Failed: error 125 at offset 6: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /a{37,17}/ Failed: error 104 at offset 7: numbers out of order in {} quantifier @@ -1170,7 +1134,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capturing subpattern count = 1 Compile options: -Overall options: anchored dotall +Overall options: anchored Subject length lower bound = 1 /(?s:.*X|^B)/IB @@ -1233,8 +1197,7 @@ Subject length lower bound = 3 0+ issippi 0: iss 0+ ippi - *** Failers -No match +\= Expect no match Mississippi\=anchored No match @@ -1341,6 +1304,17 @@ Subject length lower bound = 3 0: ab\x0a 0+ cd +/^/gm,newline=any + a\rb\nc\r\nxyz\=aftertext + 0: + 0+ a\x0db\x0ac\x0d\x0axyz + 0: + 0+ b\x0ac\x0d\x0axyz + 0: + 0+ c\x0d\x0axyz + 0: + 0+ xyz + /abc/I Capturing subpattern count = 0 First code unit = 'a' @@ -1584,8 +1558,7 @@ Subject length lower bound = 0 0: ab \ 0: - *** Failers - 0: +\= Expect no match \=notempty No match @@ -1599,8 +1572,7 @@ Subject length lower bound = 0 0: ab-c\=notempty 0: - - *** Failers - 0: +\= Expect no match abc\=notempty No match @@ -1638,8 +1610,7 @@ Subject length lower bound = 2 0: () 12(abcde(fsh)xyz(foo(bar))lmno)89 0: (abcde(fsh)xyz(foo(bar))lmno) - *** Failers -No match +\= Expect no match abcd No match abcd) @@ -1676,8 +1647,7 @@ Subject length lower bound = 3 0: (c) ((ab)) 0: ((ab)) - *** Failers -No match +\= Expect no match () No match @@ -2134,9 +2104,7 @@ Subject length lower bound = 2 aB 0: aB 1: a - *** Failers - 0: ai - 1: a +\= Expect no match Ab No match AB @@ -2146,7 +2114,7 @@ No match Failed: error 108 at offset 9: range out of order in character class /^(?(0)f|b)oo/I -Failed: error 135 at offset 6: invalid condition (?(0) +Failed: error 115 at offset 5: reference to non-existent subpattern # This one's here because of the large output vector needed @@ -2154,7 +2122,7 @@ Failed: error 135 at offset 6: invalid condition (?(0) Capturing subpattern count = 271 Max back reference = 270 Starting code units: 0 1 2 3 4 5 6 7 8 9 -Subject length lower bound = 272 +Subject length lower bound = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC\=ovector=300 0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC 1: 1 @@ -2659,8 +2627,7 @@ Subject length lower bound = 2 0: ab aB 0: aB - *** Failers -No match +\= Expect no match AB No match @@ -2684,8 +2651,7 @@ Subject length lower bound = 2 aB 0: aB 1: aB - *** Failers -No match +\= Expect no match AB No match @@ -2697,8 +2663,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 -Compile options: extended -Overall options: caseless extended +Options: extended First code unit = 'a' (caseless) Last code unit = 'c' (caseless) Subject length lower bound = 3 @@ -2712,8 +2677,7 @@ Subject length lower bound = 3 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Compile options: extended -Overall options: caseless extended +Options: extended First code unit = 'a' (caseless) Last code unit = 'c' (caseless) Subject length lower bound = 3 @@ -2831,8 +2795,7 @@ Capturing subpattern count = 0 Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 - *** Failers - 0: F +\= Expect no match xxxxx No match @@ -2917,8 +2880,7 @@ Subject length lower bound = 0 now is the time for all good men to come to the aid of the party 0: now is the time for all good men to come to the aid of the party 1: party - *** Failers -No match +\= Expect no match this is not a line with only words and spaces! No match @@ -2930,8 +2892,7 @@ Subject length lower bound = 2 0: 12345a 1: 12345 2: a - *** Failers -No match +\= Expect no match 12345+ No match @@ -2993,8 +2954,7 @@ Subject length lower bound = 3 (abc(def)xyz) 0: (abc(def)xyz) 1: xyz - *** Failers -No match +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match @@ -3062,8 +3022,6 @@ Subject length lower bound = 3 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Compile options: -Overall options: ungreedy First code unit = 'x' Last code unit = 'b' Subject length lower bound = 3 @@ -3140,19 +3098,19 @@ Failed: error 113 at offset 0: POSIX collating elements are not supported Failed: error 112 at offset 0: POSIX named classes are supported only within a class /\l/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\L/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\N{name}/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\u/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /\U/I -Failed: error 137 at offset 1: PCRE does not support \L, \l, \N{name}, \U, or \u +Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u /a{1,3}b/ungreedy ab @@ -3218,8 +3176,7 @@ Subject length lower bound = 2 0: def> 0: <> - *** Failers -No match +\= Expect no match -Overall options: caseless Starting code units: A B a b Subject length lower bound = 1 @@ -3472,8 +3426,7 @@ Subject length lower bound = 6 --->1234abcdef 0 ^ ^ d 0: abcdef - *** Failers -No match +\= Expect no match abcxyz No match abcxyzf @@ -3505,8 +3458,7 @@ Subject length lower bound = 7 1 ^ \d 2 ^ ^ d 0: 4abcdef - *** Failers -No match +\= Expect no match abcdef No match @@ -3523,8 +3475,7 @@ Subject length lower bound = 7 1 ^ \d 2 ^ ^ d 0: 4abcdef - *** Failers -No match +\= Expect no match abcdef No match @@ -3548,8 +3499,6 @@ Capturing subpattern count = 0 First code unit = 'a' Last code unit = 'f' Subject length lower bound = 6 - *** Failers -No match \x83\x0\x61bcdef --->\x83\x00abcdef 0 ^ ^ d @@ -3582,8 +3531,7 @@ Callout 1: last capture = 1 123abcdefC-\=callout_none 0: abcdef 1: abc - *** Failers -No match +\= Expect no match 123abcdef\=callout_fail=1 --->123abcdef 0 ^ ^ d @@ -3596,29 +3544,25 @@ May match empty string Subject length lower bound = 0 abcabcabc --->abcabcabc - 0 ^ (abc(?C1))* - 1 ^ ^ ) - 1 ^ ^ ) - 1 ^ ^ ) + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* 0: abcabcabc 1: abc - abcabc\=callout_fail=1:3 + abcabc\=callout_fail=1:4 --->abcabc - 0 ^ (abc(?C1))* - 1 ^ ^ ) - 1 ^ ^ ) + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* 0: abcabc 1: abc - *** Failers ---->*** Failers - 0 ^ (abc(?C1))* - 0: - abcabcabc\=callout_fail=1:3 + abcabcabc\=callout_fail=1:4 --->abcabcabc - 0 ^ (abc(?C1))* - 1 ^ ^ ) - 1 ^ ^ ) - 1 ^ ^ ) + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* 0: abcabc 1: abc @@ -3630,36 +3574,36 @@ Subject length lower bound = 0 Callout 0: last capture = 0 0: --->123 - ^ ^ ) + ^ ^ )* 0: 123 1: 123 123456\=callout_capture Callout 0: last capture = 0 0: --->123456 - ^ ^ ) + ^ ^ )* Callout 0: last capture = 1 0: 1: 123 --->123456 - ^ ^ ) + ^ ^ )* 0: 123456 1: 456 123456789\=callout_capture Callout 0: last capture = 0 0: --->123456789 - ^ ^ ) + ^ ^ )* Callout 0: last capture = 1 0: 1: 123 --->123456789 - ^ ^ ) + ^ ^ )* Callout 0: last capture = 1 0: 1: 456 --->123456789 - ^ ^ ) + ^ ^ )* 0: 123456789 1: 789 @@ -3754,6 +3698,7 @@ Capturing subpattern count = 2 First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 +\= Expect no match abbbbbccc\=callout_data=1 --->abbbbbccc 1 ^ ^ @@ -3765,6 +3710,7 @@ Capturing subpattern count = 2 First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 +\= Expect no match abbbbbccc\=callout_data=1 --->abbbbbccc 1 ^ ^ @@ -3802,15 +3748,6 @@ Capturing subpattern count = 0 Starting code units: a b Subject length lower bound = 1 -/(?R)/I -Failed: error 140 at offset 3: recursion could loop indefinitely - -/(a|(?R))/I -Failed: error 140 at offset 6: recursion could loop indefinitely - -/(ab|(bc|(de|(?R))))/I -Failed: error 140 at offset 15: recursion could loop indefinitely - /x(ab|(bc|(de|(?R))))/I Capturing subpattern count = 3 First code unit = 'x' @@ -3837,17 +3774,10 @@ Subject length lower bound = 3 1: xxab 2: xxab 3: xxab - *** Failers -No match +\= Expect no match xyab No match -/(ab|(bc|(de|(?1))))/I -Failed: error 140 at offset 15: recursion could loop indefinitely - -/x(ab|(bc|(de|(?1)x)x)x)/I -Failed: error 140 at offset 16: recursion could loop indefinitely - /^([^()]|\((?1)*\))*$/I Capturing subpattern count = 1 May match empty string @@ -3863,8 +3793,7 @@ Subject length lower bound = 0 a(b(c))d 0: a(b(c))d 1: d - *** Failers) -No match +\= Expect no match) a(b(c)d No match @@ -3933,8 +3862,7 @@ Subject length lower bound = 1 -12 0: -12 1: -12 - *** Failers -No match +\= Expect no match ((2+2)*-3)-7) No match @@ -3942,7 +3870,7 @@ No match Capturing subpattern count = 2 Compile options: Overall options: anchored -Subject length lower bound = 2 +Subject length lower bound = 3 xyz 0: xyz 1: xyz @@ -3951,8 +3879,7 @@ Subject length lower bound = 2 0: xxyzxyzz 1: xxyzxyzz 2: xyzxyz - *** Failers -No match +\= Expect no match xxyzz No match xxyzxyzxyzz @@ -3988,8 +3915,7 @@ Subject length lower bound = 2 0: <> 1: <> 2: <> - *** Failers -No match +\= Expect no match Overall options: anchored -Subject length lower bound = 3 +Subject length lower bound = 2 a=a 0: a=a 1: a @@ -4158,8 +4084,7 @@ Subject length lower bound = 0 2: 3: Able was I ere I saw Elba 4: A - *** Failers -No match +\= Expect no match The quick brown fox No match @@ -4337,6 +4262,7 @@ Subject length lower bound = 2 Callout data = 1 0: ab 1: ab +\= Expect no match aaabbb\=callout_data=-1 --->aaabbb 1 ^ ^ b @@ -4428,10 +4354,10 @@ Subject length lower bound = 4 C aa (2) A (group 2) /(?Peks)(?Peccs)/I -Failed: error 143 at offset 15: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 16: two named subpatterns have the same name (PCRE2_DUPNAMES not set) /(?Pabc(?Pdef)(?Pxyz))/I -Failed: error 143 at offset 30: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 31: two named subpatterns have the same name (PCRE2_DUPNAMES not set) "\[((?P\d+)(,(?P>elem))*)\]"I Capturing subpattern count = 3 @@ -4445,8 +4371,7 @@ Subject length lower bound = 3 1: 10,20,30,5,5,4,4,2,43,23,4234 2: 10 3: ,4234 - *** Failers -No match +\= Expect no match [] No match @@ -4533,16 +4458,33 @@ Capturing subpattern count = 2 May match empty string Subject length lower bound = 0 -/[ab]{1}+/IB +/[ab]{1}+/B ------------------------------------------------------------------ Bra - [ab]{1,1}+ + [ab] + Ket + End +------------------------------------------------------------------ + +/()(?1){1}/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Recurse + Ket + End +------------------------------------------------------------------ + +/()(?1)/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Recurse Ket End ------------------------------------------------------------------ -Capturing subpattern count = 0 -Starting code units: a b -Subject length lower bound = 1 /((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii Capturing subpattern count = 3 @@ -4640,6 +4582,7 @@ Subject length lower bound = 5 +4 ^ ^ e +5 ^ ^ 0: abcde +\= Expect no match abcdfe --->abcdfe +0 ^ a @@ -4763,6 +4706,7 @@ Subject length lower bound = 2 +2 ^ ^ b +3 ^ ^ 0: aaaab +\= Expect no match aaaacb --->aaaacb +0 ^ a+ @@ -4778,7 +4722,7 @@ No match /(abc|def)x/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 9 + Callout 255 0 1 CBra 1 Callout 255 1 1 a @@ -4786,7 +4730,7 @@ No match b Callout 255 3 1 c - Callout 255 4 0 + Callout 255 4 1 Alt Callout 255 5 1 d @@ -4794,7 +4738,7 @@ No match e Callout 255 7 1 f - Callout 255 8 0 + Callout 255 8 1 Ket Callout 255 9 1 x @@ -4809,7 +4753,7 @@ Last code unit = 'x' Subject length lower bound = 4 abcx --->abcx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c @@ -4820,7 +4764,7 @@ Subject length lower bound = 4 1: abc defx --->defx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e @@ -4830,18 +4774,17 @@ Subject length lower bound = 4 +10 ^ ^ 0: defx 1: def - ** Failers -No match +\= Expect no match abcdefzx --->abcdefzx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c +4 ^ ^ | +9 ^ ^ x +5 ^ d - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e @@ -4853,7 +4796,7 @@ No match /(abc|def)x/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 9 + Callout 255 0 1 CBra 1 Callout 255 1 1 a @@ -4861,7 +4804,7 @@ No match b Callout 255 3 1 c - Callout 255 4 0 + Callout 255 4 1 Alt Callout 255 5 1 d @@ -4869,7 +4812,7 @@ No match e Callout 255 7 1 f - Callout 255 8 0 + Callout 255 8 1 Ket Callout 255 9 1 x @@ -4884,7 +4827,7 @@ Last code unit = 'x' Subject length lower bound = 4 abcx --->abcx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c @@ -4895,7 +4838,7 @@ Subject length lower bound = 4 1: abc defx --->defx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e @@ -4905,18 +4848,17 @@ Subject length lower bound = 4 +10 ^ ^ 0: defx 1: def - ** Failers -No match +\= Expect no match abcdefzx --->abcdefzx - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ c +4 ^ ^ | +9 ^ ^ x +5 ^ d - +0 ^ (abc|def) + +0 ^ ( +1 ^ a +5 ^ d +6 ^^ e @@ -4932,7 +4874,7 @@ Starting code units: a c Subject length lower bound = 6 ababab --->ababab - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ | @@ -4949,42 +4891,42 @@ Subject length lower bound = 6 1: ab abcdabcd --->abcdabcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ | +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +2 ^ ^ b +3 ^ ^ | +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +12 ^ ^ 0: abcdabcd 1: cd abcdcdcdcdcd --->abcdcdcdcdcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +2 ^^ b +3 ^ ^ | +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +12 ^ ^ 0: abcdcdcd 1: cd @@ -4992,7 +4934,7 @@ Subject length lower bound = 6 /([ab]{,4}c|xy)/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 14 + Callout 255 0 1 CBra 1 Callout 255 1 4 [ab] @@ -5006,13 +4948,13 @@ Subject length lower bound = 6 } Callout 255 9 1 c - Callout 255 10 0 + Callout 255 10 1 Alt Callout 255 11 1 x Callout 255 12 1 y - Callout 255 13 0 + Callout 255 13 1 Ket Callout 255 14 0 Ket @@ -5022,17 +4964,18 @@ Capturing subpattern count = 1 Options: auto_callout Starting code units: a b x Subject length lower bound = 2 +\= Expect no match Note: that { does NOT introduce a quantifier --->Note: that { does NOT introduce a quantifier - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x @@ -5041,7 +4984,7 @@ No match /([ab]{,4}c|xy)/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 14 + Callout 255 0 1 CBra 1 Callout 255 1 4 [ab] @@ -5055,13 +4998,13 @@ No match } Callout 255 9 1 c - Callout 255 10 0 + Callout 255 10 1 Alt Callout 255 11 1 x Callout 255 12 1 y - Callout 255 13 0 + Callout 255 13 1 Ket Callout 255 14 0 Ket @@ -5071,17 +5014,18 @@ Capturing subpattern count = 1 Options: auto_callout Starting code units: a b x Subject length lower bound = 2 +\= Expect no match Note: that { does NOT introduce a quantifier --->Note: that { does NOT introduce a quantifier - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x - +0 ^ ([ab]{,4}c|xy) + +0 ^ ( +1 ^ [ab] +5 ^^ { +11 ^ x @@ -5090,58 +5034,58 @@ No match /([ab]{1,4}c|xy){4,5}?123/IB,auto_callout ------------------------------------------------------------------ Bra - Callout 255 0 21 + Callout 255 0 1 CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket CBra 1 Callout 255 1 9 [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket Braminzero CBra 1 @@ -5149,13 +5093,13 @@ No match [ab]{1,4}+ Callout 255 10 1 c - Callout 255 11 0 + Callout 255 11 1 Alt Callout 255 12 1 x Callout 255 13 1 y - Callout 255 14 0 + Callout 255 14 7 Ket Callout 255 21 1 1 @@ -5174,7 +5118,7 @@ Last code unit = '3' Subject length lower bound = 11 aacaacaacaacaac123 --->aacaacaacaacaac123 - +0 ^ ([ab]{1,4}c|xy){4,5}? + +0 ^ ( +1 ^ [ab]{1,4} +10 ^ ^ c +11 ^ ^ | @@ -5212,7 +5156,7 @@ Max lookbehind = 1 May match empty string Options: dotall Subject length lower bound = 0 - ab cd\=offset=1 + ab cd\=startoffset=1 0: cd /(?!.bcd).*/I @@ -5237,8 +5181,7 @@ Partial match: abcd 0: abcde the quick brown abc\=ps Partial match: abc - ** Failers\=ps -No match +\= Expect no match\=ps the quick brown abxyz fox\=ps No match @@ -5279,8 +5222,7 @@ Partial match: 02/ Partial match: 02/0 02/1\=ps Partial match: 02/1 - ** Failers\=ps -No match +\= Expect no match\=ps \=ps No match 123\=ps @@ -5360,8 +5302,7 @@ Partial match: 1234 Partial match: 12345 12345X 0: 12345X - *** Failers -No match +\= Expect no match 1X No match 123456\=ps @@ -5400,8 +5341,7 @@ Last code unit = 'k' Subject length lower bound = 10 this is a line\nbreak 0: line\x0abreak - ** Failers -No match +\= Expect no match line one\nthis is a line\nbreak in the second line No match @@ -5414,8 +5354,7 @@ Last code unit = 'k' Subject length lower bound = 10 this is a line\nbreak 0: line\x0abreak - ** Failers -No match +\= Expect no match line one\nthis is a line\nbreak in the second line No match @@ -5426,8 +5365,7 @@ Last code unit = 'd' Subject length lower bound = 4 AbCd 0: AbCd - ** Failers -No match +\= Expect no match abcd No match @@ -5712,7 +5650,7 @@ First code unit = \xff Subject length lower bound = 1 /^((?Pa1)|(?Pa2)b)/I -Failed: error 143 at offset 17: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 18: two named subpatterns have the same name (PCRE2_DUPNAMES not set) /^((?Pa1)|(?Pa2)b)/I,dupnames Capturing subpattern count = 3 @@ -5733,8 +5671,6 @@ Subject length lower bound = 2 2: 3: a2 C a2 (2) A (non-unique) - ** Failers -No match a1b\=copy=Z,copy=A 0: a1 1: a1 @@ -5819,8 +5755,6 @@ Subject length lower bound = 2 2: 3: a2 G a2 (2) A (non-unique) - ** Failers -No match a1b\=get=Z,get=A 0: a1 1: a1 @@ -5882,7 +5816,7 @@ Named capturing subpatterns: A 2 A 3 Compile options: -Overall options: anchored dupnames +Overall options: anchored Duplicate name status changes Subject length lower bound = 2 a1b\=copy=A @@ -5898,16 +5832,11 @@ Subject length lower bound = 2 C a2 (2) A (non-unique) /^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I -Failed: error 143 at offset 37: two named subpatterns have the same name (PCRE2_DUPNAMES not set) +Failed: error 143 at offset 38: two named subpatterns have the same name (PCRE2_DUPNAMES not set) -/ In this next test, J is not set at the outer level; consequently it isn't -set in the pattern's options; consequently pcre_get_named_substring() produces -a random value. /Ix -Capturing subpattern count = 1 -Options: extended -First code unit = 'I' -Last code unit = 'e' -Subject length lower bound = 141 +# In this next test, J is not set at the outer level; consequently it isn't set +# in the pattern's options; consequently pcre2_substring_get_byname() produces +# a random value. /^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I Capturing subpattern count = 4 @@ -5943,8 +5872,7 @@ Subject length lower bound = 1 1: a bc 0: b - ** Failers -No match +\= Expect no match abc No match @@ -5960,10 +5888,10 @@ Subject length lower bound = 2 1: X /(?:(?(2y)a|b)(X))+/I -Failed: error 126 at offset 7: malformed number or name after (?( +Failed: error 124 at offset 7: missing closing parenthesis for condition /(?:(?(ZA)a|b)(?PX))+/I -Failed: error 115 at offset 9: reference to non-existent subpattern +Failed: error 115 at offset 6: reference to non-existent subpattern /(?:(?(ZZ)a|b)(?(ZZ)a|b)(?PX))+/I Capturing subpattern count = 1 @@ -6043,8 +5971,7 @@ Subject length lower bound = 3 0: abc xyz\r\nabc 0: abc - ** Failers -No match +\= Expect no match xyz\rabc No match xyzabc\r @@ -6065,8 +5992,7 @@ Last code unit = 'c' Subject length lower bound = 3 xyz\r\nabclf> 0: abc - ** Failers -No match +\= Expect no match xyz\nabclf No match xyz\rabclf @@ -6081,8 +6007,7 @@ Last code unit = 'c' Subject length lower bound = 3 xyz\rabc 0: abc - ** Failers -No match +\= Expect no match xyz\nabc No match xyz\r\nabc @@ -6376,8 +6301,7 @@ Last code unit = 'A' Subject length lower bound = 3 aaaA5 0: aaaA5 - ** Failers -No match +\= Expect no match aaaa5 No match @@ -7091,15 +7015,14 @@ Failed: error 141 at offset 3: unrecognized character after (?P bXbX 0: bX 1: X - ** Failers -No match +\= Expect no match aXaX No match aXbX No match /^(?P>abc)(?xxx)/ -Failed: error 115 at offset 8: reference to non-existent subpattern +Failed: error 115 at offset 5: reference to non-existent subpattern /^(?P>abc)(?x|y)/ xx @@ -7138,8 +7061,7 @@ Failed: error 115 at offset 8: reference to non-existent subpattern 0: bx 1: bx 2: x - ** Failers -No match +\= Expect no match axby No match @@ -7159,8 +7081,7 @@ No match 1: Xy 2: X 3: y - ** Failers -No match +\= Expect no match x No match @@ -7308,8 +7229,7 @@ Subject length lower bound = 0 2: 3: Able was I ere I saw Elba 4: A - *** Failers -No match +\= Expect no match The quick brown fox No match @@ -7342,8 +7262,7 @@ Subject length lower bound = 2 0: adaa 1: a 2: d - ** Failers -No match +\= Expect no match addd No match adbb @@ -7358,19 +7277,18 @@ No match 0: bdab 1: b 2: d - ** Failers -No match +\= Expect no match bddd No match /(?( (?'B' abc (?(R) (?(R&C)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x -Failed: error 115 at offset 29: reference to non-existent subpattern +Failed: error 115 at offset 27: reference to non-existent subpattern /^(?(DEFINE) abc | xyz ) /x -Failed: error 154 at offset 22: DEFINE group contains more than one branch +Failed: error 154 at offset 4: DEFINE group contains more than one branch /(?(DEFINE) abc) xyz/Ix Capturing subpattern count = 0 @@ -7398,39 +7316,36 @@ Last code unit = 'z' Subject length lower bound = 3 /(a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\=ovector=0 -No match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4\=ovector=0 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 1: +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\=ovector=0 +No match /^a.b/newline=lf a\rb 0: a\x0db - ** Failers -No match +\= Expect no match a\nb No match /^a.b/newline=cr a\nb 0: a\x0ab - ** Failers -No match +\= Expect no match a\rb No match /^a.b/newline=anycrlf a\x85b 0: a\x85b - ** Failers -No match +\= Expect no match a\rb No match /^a.b/newline=any - ** Failers -No match +\= Expect no match a\nb No match a\rb @@ -7471,8 +7386,7 @@ No match 0: a\x0cb a\x85b 0: a\x85b - ** Failers -No match +\= Expect no match a\n\rb No match @@ -7513,8 +7427,7 @@ No match 0: a\x0a\x0db a\n\r\x85\x0cb 0: a\x0a\x0d\x85\x0cb - ** Failers -No match +\= Expect no match ab No match @@ -7533,8 +7446,7 @@ No match 0: a\x0a\x0d\x0a\x0db a\n\n\r\nb 0: a\x0a\x0a\x0d\x0ab - ** Failers -No match +\= Expect no match a\n\n\n\rb No match a\r @@ -7584,47 +7496,47 @@ Subject length lower bound = 3 1: x /(abc)(?i:(?1))/ - defabcabcxyz + defabcabcxyz 0: abcabc 1: abc - DEFabcABCXYZ +\= Expect no match + DEFabcABCXYZ No match /(abc)(?:(?i)(?1))/ - defabcabcxyz + defabcabcxyz 0: abcabc 1: abc - DEFabcABCXYZ +\= Expect no match + DEFabcABCXYZ No match /^(a)\g-2/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 8: reference to non-existent subpattern /^(a)\g/ -Failed: error 158 at offset 5: a numbered reference must not be zero +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /^(a)\g{0}/ -Failed: error 158 at offset 8: a numbered reference must not be zero +Failed: error 115 at offset 9: reference to non-existent subpattern /^(a)\g{3/ -Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /^(a)\g{aa}/ -Failed: error 115 at offset 9: reference to non-existent subpattern +Failed: error 115 at offset 7: reference to non-existent subpattern /^a.b/newline=lf a\rb 0: a\x0db - *** Failers -No match +\= Expect no match a\nb No match /.+foo/ afoo 0: afoo - ** Failers -No match +\= Expect no match \r\nfoo No match \nfoo @@ -7635,16 +7547,14 @@ No match 0: afoo \nfoo 0: \x0afoo - ** Failers -No match +\= Expect no match \r\nfoo No match /.+foo/newline=any afoo 0: afoo - ** Failers -No match +\= Expect no match \nfoo No match \r\nfoo @@ -7663,8 +7573,7 @@ No match 0: abc\n\rxyz 0: - ** Failers -No match +\= Expect no match abc\r\nxyz No match @@ -7699,8 +7608,7 @@ No match /^X/m XABC 0: X - ** Failers -No match +\= Expect no match XABC\=notbol No match @@ -7734,19 +7642,18 @@ No match xyabcabc 0: xyabcabc 1: abc - ** Failers -No match +\= Expect no match xyabc No match /x(?-0)y/ -Failed: error 158 at offset 5: a numbered reference must not be zero +Failed: error 126 at offset 5: a relative value of zero is not allowed /x(?-1)y/ Failed: error 115 at offset 5: reference to non-existent subpattern /x(?+0)y/ -Failed: error 158 at offset 5: a numbered reference must not be zero +Failed: error 126 at offset 5: a relative value of zero is not allowed /x(?+1)y/ Failed: error 115 at offset 5: reference to non-existent subpattern @@ -7773,8 +7680,7 @@ Failed: error 115 at offset 5: reference to non-existent subpattern 1: abc Y 0: Y - ** Failers -No match +\= Expect no match abcY No match @@ -7804,13 +7710,12 @@ No match 0: YabcXabcXabc 1: Xabc 2: abc - ** Failers -No match +\= Expect no match XabcXabc No match /(?(-1)a)/B -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 5: reference to non-existent subpattern /((?(-1)a))/B ------------------------------------------------------------------ @@ -7826,7 +7731,7 @@ Failed: error 115 at offset 6: reference to non-existent subpattern ------------------------------------------------------------------ /((?(-2)a))/B -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 6: reference to non-existent subpattern /^(?(+1)X|Y)(.)/B ------------------------------------------------------------------ @@ -7855,8 +7760,7 @@ Failed: error 115 at offset 7: reference to non-existent subpattern bon-bon 0: bon-bon 1: bon - ** Failers -No match +\= Expect no match tom-bon No match @@ -7962,8 +7866,7 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat Ket End ------------------------------------------------------------------ - ** Failers -No match +\= Expect no match XXXX No match @@ -8191,6 +8094,7 @@ Failed: error 106 at offset 10: missing terminating ] for character class ------------------------------------------------------------------ /^a+(*FAIL)/auto_callout +\= Expect no match aaaaaa --->aaaaaa +0 ^ ^ @@ -8204,6 +8108,7 @@ Failed: error 106 at offset 10: missing terminating ] for character class No match /a+b?c+(*FAIL)/auto_callout +\= Expect no match aaabccc --->aaabccc +0 ^ a+ @@ -8227,6 +8132,7 @@ No match No match /a+b?(*PRUNE)c+(*FAIL)/auto_callout +\= Expect no match aaabccc --->aaabccc +0 ^ a+ @@ -8253,6 +8159,7 @@ No match No match /a+b?(*COMMIT)c+(*FAIL)/auto_callout +\= Expect no match aaabccc --->aaabccc +0 ^ a+ @@ -8265,6 +8172,7 @@ No match No match /a+b?(*SKIP)c+(*FAIL)/auto_callout +\= Expect no match aaabcccaaabccc --->aaabcccaaabccc +0 ^ a+ @@ -8284,6 +8192,7 @@ No match No match /a+b?(*THEN)c+(*FAIL)/auto_callout +\= Expect no match aaabccc --->aaabccc +0 ^ a+ @@ -8313,10 +8222,10 @@ No match Failed: error 166 at offset 7: (*MARK) must have an argument /(?i:A{1,}\6666666666)/ -Failed: error 161 at offset 19: number is too big +Failed: error 161 at offset 19: group number is too big /\g6666666666/ -Failed: error 161 at offset 11: number is too big +Failed: error 161 at offset 7: group number is too big /[\g6666666666]/B ------------------------------------------------------------------ @@ -8330,6 +8239,7 @@ Failed: error 161 at offset 11: number is too big Failed: error 115 at offset 3: reference to non-existent subpattern /.+A/newline=crlf +\= Expect no match \r\nA No match @@ -8352,24 +8262,21 @@ Failed: error 160 at offset 5: (*VERB) not recognized or malformed /(*CR)a.b/ a\nb 0: a\x0ab - ** Failers -No match +\= Expect no match a\rb No match /(*CR)a.b/newline=lf a\nb 0: a\x0ab - ** Failers -No match +\= Expect no match a\rb No match /(*LF)a.b/newline=CRLF a\rb 0: a\x0db - ** Failers -No match +\= Expect no match a\nb No match @@ -8378,14 +8285,12 @@ No match 0: a\x0db a\nb 0: a\x0ab - ** Failers -No match +\= Expect no match a\r\nb No match /(*ANYCRLF)a.b/newline=CR - ** Failers -No match +\= Expect no match a\rb No match a\nb @@ -8394,8 +8299,7 @@ No match No match /(*ANY)a.b/newline=cr - ** Failers -No match +\= Expect no match a\rb No match a\nb @@ -8438,8 +8342,7 @@ Subject length lower bound = 3 0: a\x0ab a\r\nb 0: a\x0d\x0ab - ** Failers -No match +\= Expect no match a\x85b No match a\x0bb @@ -8474,8 +8377,7 @@ Subject length lower bound = 2 0: a\x0ab a\r\nb 0: a\x0d\x0ab - ** Failers -No match +\= Expect no match a\x85b No match a\x0bb @@ -8510,8 +8412,7 @@ Subject length lower bound = 4 0: a\x0a\x0d\x0db a\r\n\r\n\r\n\r\nb 0: a\x0d\x0a\x0d\x0a\x0d\x0a\x0d\x0ab - ** Failers -No match +\= Expect no match a\x85\x85b No match a\x0b\x0bb @@ -8533,8 +8434,7 @@ Subject length lower bound = 4 0: a\x85\x85b a\x0b\x0bb 0: a\x0b\x0bb - ** Failers -No match +\= Expect no match a\r\r\r\r\rb No match @@ -8592,25 +8492,25 @@ Subject length lower bound = 2 Failed: error 162 at offset 9: subpattern name expected /(?)(?&a)/ -Failed: error 115 at offset 12: reference to non-existent subpattern +Failed: error 115 at offset 11: reference to non-existent subpattern /(?)(?&aaaaaaaaaaaaaaaaaaaaaaa)/ -Failed: error 115 at offset 32: reference to non-existent subpattern +Failed: error 115 at offset 9: reference to non-existent subpattern /(?+-a)/ -Failed: error 163 at offset 3: digit expected after (?+ +Failed: error 129 at offset 2: digit expected after (?+ or (?- /(?-+a)/ Failed: error 111 at offset 3: unrecognized character after (? or (?- /(?(-1))/ -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 5: reference to non-existent subpattern /(?(+10))/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 4: reference to non-existent subpattern /(?(10))/ -Failed: error 115 at offset 6: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern /(?(+2))()()/ @@ -8629,7 +8529,7 @@ Failed: error 162 at offset 3: subpattern name expected Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name /\kabc/ -Failed: error 169 at offset 5: \k is not followed by a braced, angle-bracketed, or quoted name +Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name /(?P=)/ Failed: error 162 at offset 4: subpattern name expected @@ -8637,18 +8537,6 @@ Failed: error 162 at offset 4: subpattern name expected /(?P>)/ Failed: error 162 at offset 4: subpattern name expected -/(?!\w)(?R)/ -Failed: error 140 at offset 9: recursion could loop indefinitely - -/(?=\w)(?R)/ -Failed: error 140 at offset 9: recursion could loop indefinitely - -/(?x|y){0}z/ xzxx 0: xz yzyy 0: yz - ** Failers -No match +\= Expect no match xxz No match /(\3)(\1)(a)/ +\= Expect no match cat No match @@ -8720,8 +8608,7 @@ Failed: error 115 at offset 3: reference to non-existent subpattern abcbabc 0: abcbabc 1: abc - ** Failers -No match +\= Expect no match abcXabc No match @@ -8729,8 +8616,7 @@ No match abcXabc 0: abcXabc 1: abc - ** Failers -No match +\= Expect no match abcbabc No match @@ -8741,11 +8627,11 @@ No match 2: xyz /(?&N)[]a(?)](?abc)/ -Failed: error 115 at offset 4: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern abc)](abc)/ -Failed: error 115 at offset 4: reference to non-existent subpattern +Failed: error 115 at offset 3: reference to non-existent subpattern abcadc - +0 ^ (?(?=.*b)b|^) - +2 ^ (?=.*b) + +0 ^ (? + +2 ^ (?= +5 ^ .* +7 ^ ^ b +7 ^ ^ b @@ -8843,16 +8724,16 @@ Subject length lower bound = 0 0: abc --->abc - +0 ^ (?(?=.*b)b|^) - +2 ^ (?=.*b) + +0 ^ (? + +2 ^ (?= +5 ^ .* +7 ^ ^ b +7 ^ ^ b +7 ^^ b +8 ^ ^ ) +9 ^ b - +0 ^ (?(?=.*b)b|^) - +2 ^ (?=.*b) + +0 ^ (? + +2 ^ (?= +5 ^ .* +7 ^ ^ b +7 ^^ b @@ -8869,7 +8750,6 @@ Subject length lower bound = 1 /(?(?=.*b).*b|^d)/I Capturing subpattern count = 0 -First code unit at start or follows newline Subject length lower bound = 1 /xyz/auto_callout @@ -8887,8 +8767,7 @@ Subject length lower bound = 1 +2 ^ ^ z +3 ^ ^ 0: xyz - ** Failers -No match +\= Expect no match abc No match abcxypqr @@ -8905,20 +8784,7 @@ No match +2 ^ ^ z +3 ^ ^ 0: xyz - ** Failers ---->** Failers - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x -No match +\= Expect no match abc --->abc +0 ^ x @@ -8956,7 +8822,7 @@ No match /(*NO_AUTO_POSSESS)a+b/B ------------------------------------------------------------------ Bra - a++ + a+ b Ket End @@ -8979,23 +8845,23 @@ No match --->"ab" +0 ^ ^ +1 ^ " - +2 ^^ ((?(?=[a])[^"])|b)* - +3 ^^ (?(?=[a])[^"]) - +5 ^^ (?=[a]) + +2 ^^ ( + +3 ^^ (? + +5 ^^ (?= +8 ^^ [a] +11 ^ ^ ) +12 ^^ [^"] +16 ^ ^ ) +17 ^ ^ | - +3 ^ ^ (?(?=[a])[^"]) - +5 ^ ^ (?=[a]) + +3 ^ ^ (? + +5 ^ ^ (?= +8 ^ ^ [a] +17 ^ ^ | +21 ^ ^ " +18 ^ ^ b -+19 ^ ^ ) - +3 ^ ^ (?(?=[a])[^"]) - +5 ^ ^ (?=[a]) ++19 ^ ^ )* + +3 ^ ^ (? + +5 ^ ^ (?= +8 ^ ^ [a] +17 ^ ^ | +21 ^ ^ " @@ -9248,12 +9114,14 @@ Partial match: 123999 ^^^^ /Z(*F)/ +\= Expect no match Z\=ps No match ZA\=ps No match /Z(?!)/ +\= Expect no match Z\=ps No match ZA\=ps @@ -9405,8 +9273,7 @@ Partial match: +ab 0: abc ^^^ 0+ def - ** Failers -No match +\= Expect no match abcdef\=notempty No match xyzabcdef\=notempty @@ -9420,8 +9287,7 @@ No match 0: abc ^^^ 0+ def - ** Failers -No match +\= Expect no match abcdef\=notempty No match @@ -9441,9 +9307,7 @@ No match xyz\=notempty_atstart 0: 0+ yz - ** Failers - 0: - 0+ ** Failers +\= Expect no match xyz\=notempty No match @@ -9454,9 +9318,7 @@ No match xyzabc 0: 0+ xyzabc - ** Failers - 0: - 0+ ** Failers +\= Expect no match xyzabc\=notempty No match xyzabc\=notempty_atstart @@ -9566,8 +9428,7 @@ No match XaaX 0: aa 1: a - ** Failers -No match +\= Expect no match XAAX No match @@ -9609,22 +9470,21 @@ No match 1: a /(a)(?<=b\1)/ -Failed: error 125 at offset 10: lookbehind assertion is not fixed length /(a)(?<=b+(?1))/ -Failed: error 125 at offset 13: lookbehind assertion is not fixed length +Failed: error 125 at offset 3: lookbehind assertion is not fixed length /(a+)(?<=b(?1))/ -Failed: error 125 at offset 14: lookbehind assertion is not fixed length +Failed: error 125 at offset 4: lookbehind assertion is not fixed length /(a(?<=b(?1)))/ -Failed: error 125 at offset 13: lookbehind assertion is not fixed length +Failed: error 125 at offset 2: lookbehind assertion is not fixed length /(?<=b(?1))xyz/ Failed: error 115 at offset 8: reference to non-existent subpattern /(?<=b(?1))xyz(b+)pqrstuvew/ -Failed: error 125 at offset 26: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /(a|bc)\1/I Capturing subpattern count = 1 @@ -9748,7 +9608,7 @@ Subject length lower bound = 1 C B (1) a (group 1) /(?|(?A)|(?B))/ -Failed: error 165 at offset 15: different names for subpatterns of the same number are not allowed +Failed: error 165 at offset 16: different names for subpatterns of the same number are not allowed /(?:a(? (?')|(?")) | b(? (?')|(?")) ) @@ -9778,8 +9638,7 @@ Subject length lower bound = 3 4: " 5: 6: " - ** Failers -No match +\= Expect no match b"11111 No match a"11111 @@ -9833,8 +9692,7 @@ Subject length lower bound = 2 eX 0: eX 1: e - ** Failers -No match +\= Expect no match abcdY No match ey @@ -9879,8 +9737,7 @@ Subject length lower bound = 4 2: b 3: c 4: dd - ** Failers -No match +\= Expect no match abcdde No match @@ -9936,8 +9793,7 @@ Partial match: abcde ABXABD 0: ABD 1: B - ** Failers -No match +\= Expect no match ABX No match BAXBAD @@ -9967,6 +9823,7 @@ Capturing subpattern count = 3 Max back reference = 3 Last code unit = 'a' Subject length lower bound = 3 +\= Expect no match cat No match @@ -10004,8 +9861,7 @@ Subject length lower bound = 1 0: Ab CcC 0: c - ** Failers -No match +\= Expect no match XABX No match @@ -10105,8 +9961,8 @@ No match End ------------------------------------------------------------------ -/ -- This one is here because Perl gives the match as "b" rather than "ab". I - believe this to be a Perl bug. --/ +# This one is here because Perl gives the match as "b" rather than "ab". I +# believe this to be a Perl bug. /(?>a\Kb)z|(ab)/ ab\=startchar @@ -10114,7 +9970,14 @@ No match 1: ab /(?P(?P0|)|(?P>L2)(?P>L1))/ -Failed: error 140 at offset 31: recursion could loop indefinitely + abcd + 0: + 1: + 2: + 0abc + 0: 0 + 1: 0 + 2: 0 /abc(*MARK:)pqr/ Failed: error 166 at offset 10: (*MARK) must have an argument @@ -10123,12 +9986,13 @@ Failed: error 166 at offset 10: (*MARK) must have an argument Failed: error 166 at offset 6: (*MARK) must have an argument /abc(*FAIL:123)xyz/ -Failed: error 159 at offset 13: an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) +Failed: error 159 at offset 10: an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) # This should, and does, fail. In Perl, it does not, which I think is a # bug because replacing the B in the pattern by (B|D) does make it fail. /A(*COMMIT)B/aftertext,mark +\= Expect no match ACABX No match @@ -10140,6 +10004,7 @@ No match 0: AC /A(*PRUNE)B|A(*PRUNE)C/mark +\= Expect no match AC No match @@ -10147,8 +10012,7 @@ No match # though PCRE2 does. /^A(*:A)B|^X(*:A)Y/mark - ** Failers -No match +\= Expect no match XAQQ No match, mark = A @@ -10161,20 +10025,22 @@ No match, mark = A 0: ABC /(*COMMIT)ABC/no_start_optimize - ** Failers -No match +\= Expect no match DEFGABC No match /^(ab (c+(*THEN)cd) | xyz)/x +\= Expect no match abcccd No match /^(ab (c+(*PRUNE)cd) | xyz)/x +\= Expect no match abcccd No match /^(ab (c+(*FAIL)cd) | xyz)/x +\= Expect no match abcccd No match @@ -10211,8 +10077,7 @@ No match ------------------------------------------------------------------ ACBD 0: ACBD - *** Failers -No match +\= Expect no match A\nB No match ACB\n @@ -10232,8 +10097,7 @@ No match 0: ACBD ACB\n 0: ACB\x0a - *** Failers -No match +\= Expect no match A\nB No match @@ -10242,8 +10106,7 @@ No match 0: A\x0aB A\rB 0: A\x0dB - ** Failers -No match +\= Expect no match A\r\nB No match @@ -10354,6 +10217,7 @@ No match Ket End ------------------------------------------------------------------ +\= Expect no match X\x0d\x0a No match @@ -10409,26 +10273,30 @@ Partial match: abc Partial match: abc /abc\B/ - abc -No match abc\=ps Partial match: abc abc\=ph Partial match: abc +\= Expect no match + abc +No match /.+/ +\= Bad offsets + abc\=offset=4 +Failed: error -33: bad offset value + abc\=offset=-4 +** Invalid value in 'offset=-4' +\= Valid data abc\=offset=0 0: abc abc\=offset=1 0: bc abc\=offset=2 0: c +\= Expect no match abc\=offset=3 No match - abc\=offset=4 -Failed: error -33: bad offset value - abc\=offset=-4 -** Invalid value in 'offset=-4' /^\cÄ£/ Failed: error 168 at offset 3: \c must be followed by a printable ASCII character @@ -10464,7 +10332,7 @@ Failed: error 168 at offset 3: \c must be followed by a printable ASCII characte Failed: error 142 at offset 29: syntax error in subpattern name (missing terminator) /(?P(?P=axn)xxx)/B -Failed: error 115 at offset 15: reference to non-existent subpattern +Failed: error 115 at offset 12: reference to non-existent subpattern /(?P(?P=axn)xxx)(?yy)/B ------------------------------------------------------------------ @@ -10551,8 +10419,7 @@ Failed: error 115 at offset 15: reference to non-existent subpattern 0: aaaaX 1: a 2: X - ** Failers -No match +\= Expect no match aaaa No match @@ -10560,8 +10427,7 @@ No match aaaaX 0: aaaaX 1: X - ** Failers -No match +\= Expect no match aaaa No match @@ -10587,7 +10453,7 @@ Last code unit = '4' Subject length lower bound = 5 /(?<=(abc)+)X/ -Failed: error 125 at offset 10: lookbehind assertion is not fixed length +Failed: error 125 at offset 0: lookbehind assertion is not fixed length /(^ab)/I Capturing subpattern count = 1 @@ -10745,8 +10611,7 @@ Subject length lower bound = 1 /(?1)(?:(b(*ACCEPT))){0}c/ bc 0: bc - ** Failers -No match +\= Expect no match b No match @@ -10757,6 +10622,7 @@ No match 0: c /^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match ba No match @@ -10765,14 +10631,17 @@ No match 0: ba /^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match ac No match /^.*?(?(?=a)a(*THEN)b)c/ +\= Expect no match ac No match /^.*?(a(*THEN)b)c/ +\= Expect no match aabc No match @@ -10798,10 +10667,12 @@ No match 0: C 1: C MK: A +\= Expect no match D No match, mark = A /(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match AAAC No match, mark = A @@ -10822,6 +10693,7 @@ No match, mark = A /(?>(*ACCEPT)b)c/ c 0: +\= Expect no match c\=notempty No match @@ -10869,7 +10741,8 @@ Matched, but too many substrings /(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I Capturing subpattern count = 2 -Subject length lower bound = 1 +May match empty string +Subject length lower bound = 0 /(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I Capturing subpattern count = 2 @@ -11087,17 +10960,14 @@ Matched, but too many substrings End ------------------------------------------------------------------ -/(a+|(?R)b)/ -Failed: error 140 at offset 7: recursion could loop indefinitely - /^(a(*:A)(d|e(*:B))z|aeq)/auto_callout adz --->adz +0 ^ ^ - +1 ^ (a(*:A)(d|e(*:B))z|aeq) + +1 ^ ( +2 ^ a +3 ^^ (*:A) - +8 ^^ (d|e(*:B)) + +8 ^^ ( Latest Mark: A +9 ^^ d +10 ^ ^ | @@ -11110,10 +10980,10 @@ Latest Mark: A aez --->aez +0 ^ ^ - +1 ^ (a(*:A)(d|e(*:B))z|aeq) + +1 ^ ( +2 ^ a +3 ^^ (*:A) - +8 ^^ (d|e(*:B)) + +8 ^^ ( Latest Mark: A +9 ^^ d +11 ^^ e @@ -11129,10 +10999,10 @@ Latest Mark: B aeqwerty --->aeqwerty +0 ^ ^ - +1 ^ (a(*:A)(d|e(*:B))z|aeq) + +1 ^ ( +2 ^ a +3 ^^ (*:A) - +8 ^^ (d|e(*:B)) + +8 ^^ ( Latest Mark: A +9 ^^ d +11 ^^ e @@ -11149,6 +11019,7 @@ Latest Mark: B 1: aeq /.(*F)/ +\= Expect no match abc\=ph No match @@ -11181,14 +11052,15 @@ Subject length lower bound = 0 0: x 'a*(*ACCEPT)b'aftertext - \=notempty_atstart -No match abc\=notempty_atstart 0: a 0+ bc bbb\=notempty_atstart 0: 0+ bb +\= Expect no match + \=notempty_atstart +No match /(*ACCEPT)a/I,aftertext Capturing subpattern count = 0 @@ -11224,8 +11096,7 @@ Subject length lower bound = 0 aaaazzzzb 0: aaaazzzzb 1: zzzz - ** Failers -No match +\= Expect no match aazz No match @@ -11241,6 +11112,7 @@ No match 1: c /(.)((?(1)c|a)|a(?2))/ +\= Expect no match baa No match @@ -11274,8 +11146,7 @@ No match /^a\x41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aAz 0: aAz - *** Failers -No match +\= Expect no match ax41z No match @@ -11290,8 +11161,7 @@ No match /^a\u0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aAz 0: aAz - *** Failers -No match +\= Expect no match au0041z No match @@ -11302,16 +11172,14 @@ No match /^a\u041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames au041z 0: au041z - *** Failers -No match +\= Expect no match aAz No match /^a\U0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aU0041z 0: aU0041z - *** Failers -No match +\= Expect no match aAz No match @@ -11353,14 +11221,14 @@ No match ------------------------------------------------------------------ /a[\NB]c/ -Failed: error 171 at offset 3: \N is not supported in a class +Failed: error 171 at offset 4: \N is not supported in a class aNc /a[B-\Nc]/ -Failed: error 150 at offset 5: invalid range in character class +Failed: error 150 at offset 6: invalid range in character class /a[B\Nc]/ -Failed: error 171 at offset 4: \N is not supported in a class +Failed: error 171 at offset 5: \N is not supported in a class /(a)(?2){0,1999}?(b)/ @@ -11384,8 +11252,7 @@ MK: A 4: X 5: Y MK: B - ** Failers -No match +\= Expect no match XAQQ No match, mark = A XAQQXZZ @@ -11402,8 +11269,7 @@ No match, mark = B aw 0: aw MK: n - ** Failers -No match, mark = n +\= Expect no match abc No match, mark = m @@ -11411,8 +11277,7 @@ No match, mark = m aw 0: aw MK: n - ** Failers -No match, mark = n +\= Expect no match abc No match, mark = m @@ -11868,11 +11733,11 @@ Callout 2: last capture = 2 1: 2: a --->aab - ^^ ) + ^^ ){0} Callout 1: last capture = 0 0: --->aab - ^^ ((a)(?C2)){0} + ^^ ( 0: a /(?:(a)+(?C1)bb|aa(?C2)b)++/ @@ -11931,8 +11796,8 @@ Partial match: 123a bb --->bb +0 ^ ^ - +1 ^ (?(?=a)aa|bb) - +3 ^ (?=a) + +1 ^ (? + +3 ^ (?= +6 ^ a +11 ^ b +12 ^^ b @@ -11944,8 +11809,8 @@ Partial match: 123a bb --->bb 1 ^ ^ - 2 ^ (?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10)) - 99 ^ (?=(?C3)a(?C4)) + 2 ^ (? + 99 ^ (?= 3 ^ a 8 ^ b 9 ^^ b @@ -11969,10 +11834,11 @@ Partial match: 123a 0: ad /^(?!a(*THEN)b|ac)../ - ac -No match ad 0: ad +\= Expect no match + ac +No match /^(?=a(*THEN)b|ac)/ ac @@ -12009,6 +11875,7 @@ No match 0: bn /(?(?=b(*SKIP)a)bn|bnn)/ +\= Expect no match bnn No match @@ -12073,9 +11940,10 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -# A complete set of tests for auto-possessification of character types. +# A complete set of tests for auto-possessification of character types, but +# omitting \C because it might be disabled (it has its own tests). -/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\C \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx +/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx ------------------------------------------------------------------ Bra \D+ @@ -12093,8 +11961,6 @@ Subject length lower bound = 5 \D+ Any \D+ - AllAny - \D+ \R \D+ \H @@ -12114,7 +11980,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\C \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx +/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx ------------------------------------------------------------------ Bra \d++ @@ -12131,8 +11997,6 @@ Subject length lower bound = 5 \w \d+ Any - \d+ - AllAny \d++ \R \d+ @@ -12153,7 +12017,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\C \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx +/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx ------------------------------------------------------------------ Bra \S+ @@ -12170,8 +12034,6 @@ Subject length lower bound = 5 \w \S+ Any - \S+ - AllAny \S++ \R \S+ @@ -12192,7 +12054,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\C \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx +/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx ------------------------------------------------------------------ Bra \s+ @@ -12210,8 +12072,6 @@ Subject length lower bound = 5 \s+ Any \s+ - AllAny - \s+ \R \s+ \H @@ -12231,7 +12091,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\C \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx +/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx ------------------------------------------------------------------ Bra \W+ @@ -12249,8 +12109,6 @@ Subject length lower bound = 5 \W+ Any \W+ - AllAny - \W+ \R \W+ \H @@ -12270,7 +12128,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\C \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx +/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx ------------------------------------------------------------------ Bra \w+ @@ -12287,8 +12145,6 @@ Subject length lower bound = 5 \w \w+ Any - \w+ - AllAny \w++ \R \w+ @@ -12309,7 +12165,303 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\C \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx +/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx +------------------------------------------------------------------ + Bra + \R+ + \D + \R++ + \d + \R+ + \S + \R++ + \s + \R+ + \W + \R++ + \w + \R++ + Any + \R+ + \R + \R+ + \H + \R++ + \h + \R+ + \V + \R+ + \v + \R+ + \Z + \R++ + \z + \R+ + $ + Ket + End +------------------------------------------------------------------ + +/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx +------------------------------------------------------------------ + Bra + \H+ + \D + \H+ + \d + \H+ + \S + \H+ + \s + \H+ + \W + \H+ + \w + \H+ + Any + \H+ + \R + \H+ + \H + \H++ + \h + \H+ + \V + \H+ + \v + \H+ + \Z + \H++ + \z + \H+ + $ + Ket + End +------------------------------------------------------------------ + +/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx +------------------------------------------------------------------ + Bra + \h+ + \D + \h++ + \d + \h++ + \S + \h+ + \s + \h+ + \W + \h++ + \w + \h+ + Any + \h++ + \R + \h++ + \H + \h+ + \h + \h+ + \V + \h++ + \v + \h+ + \Z + \h++ + \z + \h+ + $ + Ket + End +------------------------------------------------------------------ + +/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx +------------------------------------------------------------------ + Bra + \V+ + \D + \V+ + \d + \V+ + \S + \V+ + \s + \V+ + \W + \V+ + \w + \V+ + Any + \V++ + \R + \V+ + \H + \V+ + \h + \V+ + \V + \V++ + \v + \V+ + \Z + \V++ + \z + \V+ + $ + Ket + End +------------------------------------------------------------------ + +/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx +------------------------------------------------------------------ + Bra + \v+ + \D + \v++ + \d + \v++ + \S + \v+ + \s + \v+ + \W + \v++ + \w + \v+ + Any + \v+ + \R + \v+ + \H + \v++ + \h + \v++ + \V + \v+ + \v + \v+ + \Z + \v++ + \z + \v+ + $ + Ket + End +------------------------------------------------------------------ + +/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx +------------------------------------------------------------------ + Bra + a+ + \D + a++ + \d + a+ + \S + a++ + \s + a++ + \W + a+ + \w + a+ + Any + a++ + \R + a+ + \H + a++ + \h + a+ + \V + a++ + \v + a++ + \Z + a++ + \z + a++ + $ + Ket + End +------------------------------------------------------------------ + +/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx +------------------------------------------------------------------ + Bra + \x0a+ + \D + \x0a++ + \d + \x0a++ + \S + \x0a+ + \s + \x0a+ + \W + \x0a++ + \w + \x0a+ + Any + \x0a+ + \R + \x0a+ + \H + \x0a++ + \h + \x0a++ + \V + \x0a+ + \v + \x0a+ + \Z + \x0a++ + \z + \x0a+ + $ + Ket + End +------------------------------------------------------------------ + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx +------------------------------------------------------------------ + Bra + Any+ + \D + Any+ + \d + Any+ + \S + Any+ + \s + Any+ + \W + Any+ + \w + Any+ + Any + Any++ + \R + Any+ + \H + Any+ + \h + Any+ + \V + Any+ + \v + Any+ + \Z + Any++ + \z + Any+ + $ + Ket + End +------------------------------------------------------------------ + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx ------------------------------------------------------------------ Bra AllAny+ @@ -12325,8 +12477,6 @@ Subject length lower bound = 5 AllAny+ \w AllAny+ - Any - AllAny+ AllAny AllAny+ \R @@ -12348,358 +12498,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ -/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\C \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx ------------------------------------------------------------------- - Bra - \R+ - \D - \R++ - \d - \R+ - \S - \R++ - \s - \R+ - \W - \R++ - \w - \R++ - Any - \R+ - AllAny - \R+ - \R - \R+ - \H - \R++ - \h - \R+ - \V - \R+ - \v - \R+ - \Z - \R++ - \z - \R+ - $ - Ket - End ------------------------------------------------------------------- - -/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\C \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx ------------------------------------------------------------------- - Bra - \H+ - \D - \H+ - \d - \H+ - \S - \H+ - \s - \H+ - \W - \H+ - \w - \H+ - Any - \H+ - AllAny - \H+ - \R - \H+ - \H - \H++ - \h - \H+ - \V - \H+ - \v - \H+ - \Z - \H++ - \z - \H+ - $ - Ket - End ------------------------------------------------------------------- - -/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\C \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx ------------------------------------------------------------------- - Bra - \h+ - \D - \h++ - \d - \h++ - \S - \h+ - \s - \h+ - \W - \h++ - \w - \h+ - Any - \h+ - AllAny - \h++ - \R - \h++ - \H - \h+ - \h - \h+ - \V - \h++ - \v - \h+ - \Z - \h++ - \z - \h+ - $ - Ket - End ------------------------------------------------------------------- - -/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\C \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx ------------------------------------------------------------------- - Bra - \V+ - \D - \V+ - \d - \V+ - \S - \V+ - \s - \V+ - \W - \V+ - \w - \V+ - Any - \V+ - AllAny - \V++ - \R - \V+ - \H - \V+ - \h - \V+ - \V - \V++ - \v - \V+ - \Z - \V++ - \z - \V+ - $ - Ket - End ------------------------------------------------------------------- - -/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\C \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx ------------------------------------------------------------------- - Bra - \v+ - \D - \v++ - \d - \v++ - \S - \v+ - \s - \v+ - \W - \v++ - \w - \v+ - Any - \v+ - AllAny - \v+ - \R - \v+ - \H - \v++ - \h - \v++ - \V - \v+ - \v - \v+ - \Z - \v++ - \z - \v+ - $ - Ket - End ------------------------------------------------------------------- - -/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\C a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx ------------------------------------------------------------------- - Bra - a+ - \D - a++ - \d - a+ - \S - a++ - \s - a++ - \W - a+ - \w - a+ - Any - a+ - AllAny - a++ - \R - a+ - \H - a++ - \h - a+ - \V - a++ - \v - a++ - \Z - a++ - \z - a++ - $ - Ket - End ------------------------------------------------------------------- - -/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\C \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx ------------------------------------------------------------------- - Bra - \x0a+ - \D - \x0a++ - \d - \x0a++ - \S - \x0a+ - \s - \x0a+ - \W - \x0a++ - \w - \x0a+ - Any - \x0a+ - AllAny - \x0a+ - \R - \x0a+ - \H - \x0a++ - \h - \x0a++ - \V - \x0a+ - \v - \x0a+ - \Z - \x0a++ - \z - \x0a+ - $ - Ket - End ------------------------------------------------------------------- - -/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx ------------------------------------------------------------------- - Bra - Any+ - \D - Any+ - \d - Any+ - \S - Any+ - \s - Any+ - \W - Any+ - \w - Any+ - Any - Any+ - AllAny - Any++ - \R - Any+ - \H - Any+ - \h - Any+ - \V - Any+ - \v - Any+ - \Z - Any++ - \z - Any+ - $ - Ket - End ------------------------------------------------------------------- - -/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\C .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx ------------------------------------------------------------------- - Bra - AllAny+ - \D - AllAny+ - \d - AllAny+ - \S - AllAny+ - \s - AllAny+ - \W - AllAny+ - \w - AllAny+ - AllAny - AllAny+ - AllAny - AllAny+ - \R - AllAny+ - \H - AllAny+ - \h - AllAny+ - \V - AllAny+ - \v - AllAny+ - \Z - AllAny++ - \z - AllAny+ - $ - Ket - End ------------------------------------------------------------------- - -/\D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \C+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx +/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx ------------------------------------------------------------------ Bra \D+ @@ -12713,8 +12512,6 @@ Subject length lower bound = 5 \W+ /m $ \w++ - /m $ - AllAny+ /m $ \R+ /m $ @@ -13224,7 +13021,7 @@ Failed: error 164 at offset 8: non-octal character in \o{} (closing brace missin A\123B /^A\oB/ -Failed: error 155 at offset 3: missing opening brace after \o +Failed: error 155 at offset 4: missing opening brace after \o /^A\x{zz}B/ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?) @@ -13233,7 +13030,7 @@ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing? Failed: error 167 at offset 7: non-hex character in \x{} (closing brace missing?) /^A\x{/ -Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?) +Failed: error 178 at offset 5: digits missing in \x{} or \o{} /[ab]++/B,no_auto_possess ------------------------------------------------------------------ @@ -13268,16 +13065,16 @@ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing? ------------------------------------------------------------------ /[a-[:digit:]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[A-[:digit:]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[a-[.xxx.]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[a-[=xxx=]]+/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[a-[!xxx!]]+/ Failed: error 108 at offset 3: range out of order in character class @@ -13287,13 +13084,13 @@ Failed: error 108 at offset 3: range out of order in character class 0: A]]] /[a-\d]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 5: invalid range in character class /(?<0abc>xx)/ Failed: error 144 at offset 3: group name must start with a non-digit /(?&1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 13: group name must start with a non-digit +Failed: error 144 at offset 3: group name must start with a non-digit /(?xx)/ Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator) @@ -13317,19 +13114,19 @@ Failed: error 144 at offset 3: group name must start with a non-digit Failed: error 144 at offset 4: group name must start with a non-digit /\g{4df}/ -Failed: error 144 at offset 3: group name must start with a non-digit +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /(?&1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 13: group name must start with a non-digit +Failed: error 144 at offset 3: group name must start with a non-digit /(?P>1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 14: group name must start with a non-digit +Failed: error 144 at offset 4: group name must start with a non-digit /\g'3gh'/ -Failed: error 157 at offset 7: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /\g<5fg>/ -Failed: error 157 at offset 7: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number /(?(<4gh>)abc)/ Failed: error 144 at offset 4: group name must start with a non-digit @@ -13338,7 +13135,7 @@ Failed: error 144 at offset 4: group name must start with a non-digit Failed: error 144 at offset 4: group name must start with a non-digit /(?(4gh)abc)/ -Failed: error 126 at offset 4: malformed number or name after (?( +Failed: error 124 at offset 4: missing closing parenthesis for condition /(?(R&6yh)abc)/ Failed: error 144 at offset 5: group name must start with a non-digit @@ -13393,8 +13190,7 @@ Failed: error 144 at offset 5: group name must start with a non-digit 0: red put it all on red 0: red - ** Failers -No match +\= Expect no match no reduction No match Alfred Winifred @@ -13410,16 +13206,17 @@ Start of matched string is beyond its end - displaying from end to start. 0+ abcd /abcd/newline=lf,firstline +\= Expect no match xx\nxabcd No match # Test stack guard external calls. /(((a)))/stackguard=1 -Failed: error 133 at offset 2: parentheses are too deeply nested (stack check) +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) /(((a)))/stackguard=2 -Failed: error 133 at offset 3: parentheses are too deeply nested (stack check) +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) /(((a)))/stackguard=3 @@ -13444,10 +13241,10 @@ Failed: error 133 at offset 3: parentheses are too deeply nested (stack check) ------------------------------------------------------------------ /\othing/ -Failed: error 155 at offset 1: missing opening brace after \o +Failed: error 155 at offset 2: missing opening brace after \o /\o{}/ -Failed: error 178 at offset 1: digits missing in \x{} or \o{} +Failed: error 178 at offset 3: digits missing in \x{} or \o{} /\o{whatever}/ Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missing?) @@ -13461,10 +13258,10 @@ Failed: error 178 at offset 3: digits missing in \x{} or \o{} Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?) /A\8B/ -Failed: error 115 at offset 4: reference to non-existent subpattern +Failed: error 115 at offset 2: reference to non-existent subpattern /A\9B/ -Failed: error 115 at offset 4: reference to non-existent subpattern +Failed: error 115 at offset 2: reference to non-existent subpattern # This one is here because Perl fails to match "12" for this pattern when the $ # is present. @@ -13474,8 +13271,7 @@ Failed: error 115 at offset 4: reference to non-existent subpattern 0: abc: 12 0: 12 - *** Failers -No match +\= Expect no match 123 No match xyz @@ -13551,8 +13347,7 @@ Capturing subpattern count = 0 Subject length lower bound = 6 yesnononoyes 0: nonono - ** Failers -No match +\= Expect no match yesno No match @@ -13568,21 +13363,23 @@ Subject length lower bound = 5 1: abc xyzno 0: xyzno - ** Failers -No match +\= Expect no match abcno No match xyzyes No match /(?(VERSION<10)yes|no)/ -Failed: error 179 at offset 10: syntax error in (?(VERSION condition +Failed: error 179 at offset 10: syntax error or number too big in (?(VERSION condition /(?(VERSION>10)yes|no)/ -Failed: error 179 at offset 11: syntax error in (?(VERSION condition +Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION condition /(?(VERSION>=10.0.0)yes|no)/ -Failed: error 179 at offset 16: syntax error in (?(VERSION condition +Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition + +/(?(VERSION=10.101)yes|no)/ +Failed: error 179 at offset 17: syntax error or number too big in (?(VERSION condition /abcd/I Capturing subpattern count = 0 @@ -13626,8 +13423,6 @@ Subject length lower bound = 0 /(((((a)))))/parens_nest_limit=2 Failed: error 119 at offset 3: parentheses are too deeply nested -# Tests for pcre2_substitute() - /abc/replace=XYZ 123123 0: 123123 @@ -13674,27 +13469,27 @@ Failed: error 119 at offset 3: parentheses are too deeply nested /abc/replace=a$++ 123abc -Failed: error -35: invalid replacement string +Failed: error -35 at offset 2 in replacement: invalid replacement string /abc/replace=a$bad 123abc -Failed: error -49: unknown substring +Failed: error -49 at offset 5 in replacement: unknown substring /abc/replace=a${A234567890123456789_123456789012}z 123abc -Failed: error -49: unknown substring +Failed: error -49 at offset 36 in replacement: unknown substring /abc/replace=a${A23456789012345678901234567890123}z 123abc -Failed: error -35: invalid replacement string +Failed: error -35 at offset 35 in replacement: invalid replacement string /abc/replace=a${bcd 123abc -Failed: error -35: invalid replacement string +Failed: error -58 at offset 6 in replacement: expected closing curly bracket in replacement string /abc/replace=a${b+d}z 123abc -Failed: error -35: invalid replacement string +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string /abc/replace=[10]XYZ 123abc123 @@ -13737,8 +13532,76 @@ Failed: error -34: bad option value /(.)(.)/g,replace=$2$1 abcdefgh 4: badcfehg + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + 3: pear orange strawberry + apple strudel + 1: pear strudel + fruitless + 0: fruitless -# End of substitute tests +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, + apple lemon blackberry + 1: pear sauce lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + 3: + apple strudel + 1: strudel + fruitless + 0: fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry +Failed: error -35 at offset 11 in replacement: invalid replacement string + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry +Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry +Failed: error -35 at offset 8 in replacement: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry +Failed: error -35 at offset 9 in replacement: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry +Failed: error -48: no more memory + apple lemon blackberry\=substitute_overflow_length +Failed: error -48: no more memory: 23 code units are needed + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry + 3: pear orange strawberry + +/abc/ + 123abc123\=replace=[9]XYZ +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]XYZ +Failed: error -48: no more memory: 10 code units are needed + +/a(b)c/ + 123abc123\=replace=[9]x$1z +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]x$1z +Failed: error -48: no more memory: 10 code units are needed "((?=(?(?=(?(?=(?(?=()))))))))" a @@ -13747,6 +13610,7 @@ Failed: error -34: bad option value 2: "(?(?=)==)(((((((((?=)))))))))" +\= Expect no match a No match @@ -13821,6 +13685,7 @@ Capturing subpattern count = 0 Options: auto_callout First code unit at start or follows newline Subject length lower bound = 1 +\= Expect no match aaa --->aaa +0 ^ .* @@ -13834,6 +13699,7 @@ No match Capturing subpattern count = 0 Options: auto_callout no_dotstar_anchor Subject length lower bound = 1 +\= Expect no match aaa --->aaa +0 ^ .* @@ -13864,7 +13730,7 @@ Subject length lower bound = 1 /(*NO_DOTSTAR_ANCHOR)(?s).*\d/info Capturing subpattern count = 0 Compile options: -Overall options: dotall no_dotstar_anchor +Overall options: no_dotstar_anchor Subject length lower bound = 1 '^(?:(a)|b)(?(1)A|B)' @@ -13928,8 +13794,7 @@ Failed: error 109 at offset 7: quantifier does not follow a repeatable item 0: 00765 456 0: 456 - ** Failers -No match +\= Expect no match 356 No match @@ -14088,15 +13953,15 @@ Callout {a}b} Bra Bra a - CalloutStr `code` 8 14 0 + CalloutStr `code` 8 14 4 Ket Bra a - CalloutStr `code` 8 14 0 + CalloutStr `code` 8 14 4 Ket Bra a - CalloutStr `code` 8 14 0 + CalloutStr `code` 8 14 4 Ket Ket End @@ -14107,7 +13972,7 @@ Callout {a}b} Bra ^ Cond - Callout 25 9 7 + Callout 25 9 3 Assert abc Ket @@ -14118,14 +13983,14 @@ Callout {a}b} Ket End ------------------------------------------------------------------ -Callout 25 (?=abc) +Callout 25 (?= abcdefg --->abcdefg - 25 ^ (?=abc) + 25 ^ (?= 0: abcd xyz123 --->xyz123 - 25 ^ (?=abc) + 25 ^ (?= 0: xyz /^(?(?C$abc$)(?=abc)abcd|xyz)/B @@ -14133,7 +13998,7 @@ Callout 25 (?=abc) Bra ^ Cond - CalloutStr $abc$ 7 12 7 + CalloutStr $abc$ 7 12 3 Assert abc Ket @@ -14147,12 +14012,12 @@ Callout 25 (?=abc) abcdefg Callout (7): $abc$ --->abcdefg - ^ (?=abc) + ^ (?= 0: abcd xyz123 Callout (7): $abc$ --->xyz123 - ^ (?=abc) + ^ (?= 0: xyz /^ab(?C'first')cd(?C"second")ef/ @@ -14169,13 +14034,13 @@ Callout (20): "second" aaaXY Callout (8): `code` --->aaaXY - ^^ ) + ^^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} 0: aaaX # Binary zero in callout string @@ -14193,8 +14058,7 @@ Callout (5): 'x\x00z' /(?(?!)a|b)/ bbb 0: b - ** Failers -No match +\= Expect no match aaa No match @@ -14207,7 +14071,7 @@ Failed: error -52: nested recursion at the same subject position # Perl fails to diagnose the absence of an assertion "(?(?.*!.*)?)" -Failed: error 128 at offset 3: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) "X((?2)()*+){2}+"B ------------------------------------------------------------------ @@ -14258,7 +14122,7 @@ Failed: error 115 at offset 15: reference to non-existent subpattern Failed: error 115 at offset 15: reference to non-existent subpattern ";(?<=()((?3))((?2)))" -Failed: error 125 at offset 20: lookbehind assertion is not fixed length +Failed: error 125 at offset 1: lookbehind assertion is not fixed length # Perl loops on this (PCRE2 used to!) @@ -14293,10 +14157,8 @@ Capturing subpattern count = 2 May match empty string Subject length lower bound = 0 -/ab\Cde/never_backslash_c -Failed: error 183 at offset 3: using \C is disabled by the application - /abc/ +\= Expect no match \[9x!xxx(]{9999} No match @@ -14319,7 +14181,7 @@ No match 0: /((((((((x))))))))\81/ -Failed: error 115 at offset 20: reference to non-existent subpattern +Failed: error 115 at offset 19: reference to non-existent subpattern xx1 /((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/ @@ -14342,10 +14204,10 @@ Matched, but too many substrings 14: x /\80/ -Failed: error 115 at offset 3: reference to non-existent subpattern +Failed: error 115 at offset 2: reference to non-existent subpattern /A\8B\9C/ -Failed: error 115 at offset 7: reference to non-existent subpattern +Failed: error 115 at offset 2: reference to non-existent subpattern A8B9C /(?x:((?'a')) # comment (with parentheses) and | vertical @@ -14428,10 +14290,10 @@ Subject length lower bound = 1 ------------------------------------------------------------------ /(\9*+(?2);\3++()2|)++{/ -Failed: error 115 at offset 22: reference to non-existent subpattern +Failed: error 115 at offset 2: reference to non-existent subpattern /\V\x85\9*+((?2)\3++()2)*:2/ -Failed: error 115 at offset 26: reference to non-existent subpattern +Failed: error 115 at offset 7: reference to non-existent subpattern /(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames @@ -14442,7 +14304,7 @@ Failed: error 115 at offset 26: reference to non-existent subpattern "(?J)(?'d'(?'d'\g{d}))" "(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" -Failed: error 125 at offset 72: lookbehind assertion is not fixed length +Failed: error 125 at offset 16: lookbehind assertion is not fixed length /A(?'')Z/ Failed: error 162 at offset 4: subpattern name expected @@ -14450,7 +14312,7 @@ Failed: error 162 at offset 4: subpattern name expected "(?J:(?|(?'R')(\k'R')|((?'R'))))" /(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ -Failed: error 161 at offset 32: number is too big +Failed: error 161 at offset 17: group number is too big /^(?:(?(1)x|)+)+$()/B ------------------------------------------------------------------ @@ -14470,4 +14332,1161 @@ Failed: error 161 at offset 32: number is too big End ------------------------------------------------------------------ +/[[:>:]](?<)/ +Failed: error 162 at offset 10: subpattern name expected + +/((?x)(*:0))#(?'/ +Failed: error 162 at offset 15: subpattern name expected + +/(?C$[$)(?<]/ +Failed: error 162 at offset 10: subpattern name expected + +/(?C$)$)(?<]/ +Failed: error 162 at offset 10: subpattern name expected + +/(?(R))*+/B +------------------------------------------------------------------ + Bra + Braposzero + SBraPos + SCond + Cond recurse any + Ket + KetRpos + Ket + End +------------------------------------------------------------------ + abcd + 0: + +/((?x)(?#))#(?'/ +Failed: error 162 at offset 14: subpattern name expected + +/((?x)(?#))#(?'abc')/I +Capturing subpattern count = 2 +Named capturing subpatterns: + abc 2 +First code unit = '#' +Subject length lower bound = 1 + +/[[:\\](?<[::]/ +Failed: error 162 at offset 9: subpattern name expected + +/[[:\\](?'abc')[a:]/I +Capturing subpattern count = 1 +Named capturing subpatterns: + abc 1 +Starting code units: : [ \ +Subject length lower bound = 2 + +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" +Failed: error 106 at offset 353: missing terminating ] for character class + +/()(?(R)0)*+/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Braposzero + SBraPos + SCond + Cond recurse any + 0 + Ket + KetRpos + Ket + End +------------------------------------------------------------------ + +/(?R-:(?${1:+\Q$1:{}$$\E+\U$1}< + a + 1: >$1:{}$$+A< + +/X(b)Y/substitute_extended + XbY\=replace=x${1:+$1\U$1}y + 1: xbBY + XbY\=replace=\Ux${1:+$1$1}y + 1: XBBY + +/a/substitute_extended,replace=${*MARK:+a:b} + a +Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string + +/(abcd)/replace=${1:+xy\kz},substitute_extended + abcd +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + +/(abcd)/ + abcd\=replace=${1:+xy\kz},substitute_extended +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + +/abcd/substitute_extended,replace=>$1< + abcd +Failed: error -49 at offset 3 in replacement: unknown substring + +/abcd/substitute_extended,replace=>xxx${xyz}<<< + abcd +Failed: error -49 at offset 10 in replacement: unknown substring + +/(?J)(?:(?a)|(?b))/replace=<$A> + [a] + 1: [] + [b] + 1: [] +\= Expect error + (a)\=ovector=1 +Failed: error -54 at offset 3 in replacement: requested value is not available + +/(a)|(b)/replace=<$1> +\= Expect error + b +Failed: error -55 at offset 3 in replacement: requested value is not set + +/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 + aaBB + 1: AAbbaa..AAbBaa + +/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I +Capturing subpattern count = 2 +Max back reference = 1 +Compile options: +Overall options: anchored +Last code unit = '}' +Subject length lower bound = 65535 + +/((p(?'K/ +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator) + +/((p(?'K/no_auto_capture +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator) + +/abc/replace=A$3123456789Z + abc +Failed: error -49 at offset 3 in replacement: unknown substring + +/(?a[bc]d + +0 ^ ( + +1 ^ )\Q\E* + +7 ^ ] + +8 ^^ + 0: ] + 1: + +/\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended +------------------------------------------------------------------ + Bra + \x{8a}++ + f + Alt + ; + T? + *MARK ;.'?`(\x{ea}p + {! + [\x00- "-&+-:<->@-BD-xz-\xff] (neg) + {1; + CBra 1 + \x08 + Ket + Ket + End +------------------------------------------------------------------ + +# Tests for NULL characters in comments and verb "names" and callouts + +# /A#B\x00C\x0aZ/ +/41 23 42 00 43 0a 5a/Bx,hex +------------------------------------------------------------------ + Bra + AZ + Ket + End +------------------------------------------------------------------ + +# /A+#B\x00C\x0a+/ +/41 2b 23 42 00 43 0a 2b/Bx,hex +------------------------------------------------------------------ + Bra + A++ + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}WC + Z + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}W#X\x{0}Y\x{a}C + Z + Ket + End +------------------------------------------------------------------ + +# /A(?C{X\x00Y})B/ +/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + A + CalloutStr {X\x{0}Y} 5 10 1 + B + Ket + End +------------------------------------------------------------------ + +# /A(?#X\x00Y)B/ +/41 28 3f 23 7b 00 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + AB + Ket + End +------------------------------------------------------------------ + +# Tests for leading comment in extended patterns + +/ (?-x):?/extended + +/ (?-x):?/extended + +/0b 28 3f 2d 78 29 3a/hex,extended + +/#comment +(?-x):?/extended + +/(8(*:6^\x09x\xa6l\)6!|\xd0:[^:|)\x09d\Z\d{85*m(?'(?<1!)*\W[*\xff]!!h\w]*\xbe;/alt_bsux,alt_verbnames,allow_empty_class,dollar_endonly,extended,multiline,never_utf,no_dotstar_anchor,no_start_optimize +Failed: error 162 at offset 49: subpattern name expected + +/a|(b)c/replace=>$1<,substitute_unset_empty + cat + 1: c>b$1< +Failed: error -55 at offset 3 in replacement: requested value is not set + cat\=replace=>$1<,substitute_unset_empty + 1: c>$1<,substitute_unset_empty + 1: x>b${2:-xx}< +Failed: error -49 at offset 9 in replacement: unknown substring + cat\=replace=>${2:-xx}<,substitute_unknown_unset + 1: c>xx${X:-xx}<,substitute_unknown_unset + 1: c>xx$X<,substitute_unset_empty + cat + 1: c>b$Y<,substitute_unset_empty + cat +Failed: error -49 at offset 3 in replacement: unknown substring + cat\=substitute_unknown_unset + 1: c>$2<,substitute_unset_empty + cat +Failed: error -49 at offset 3 in replacement: unknown substring + cat\=substitute_unknown_unset + 1: c>9010 + 0 ^ 0 + 0 ^ 0 + 0: + 1: 0 +\= Expect no match + abc +--->abc + 0 ^ 0 + 0 ^ 0 + 0 ^ 0 +No match + +/aaa/ +\[abc]{10000000000000000000000000000} +** Repeat count too large +\[a]{3} + 0: aaa + +/\[AB]{6000000000000000000000}/expand +** Pattern repeat count too large + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex +Failed: error 160 at offset 3: (*VERB) not recognized or malformed + +/'(*'/hex +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + +/'('/hex +Failed: error 114 at offset 1: missing closing parenthesis + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?|([ab]))...(?<=\1)z/ +Failed: error 125 at offset 13: lookbehind assertion is not fixed length + +/([ab])(\1)...(?<=\2)z/ + aa11az + 0: aa11az + 1: a + 2: a + +/(a\2)(b\1)(?<=\2)/ +Failed: error 125 at offset 10: lookbehind assertion is not fixed length + +/(?[ab])...(?<=\k'A')z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?[ab])...(?<=\k'A')(?)z/dupnames +Failed: error 125 at offset 13: lookbehind assertion is not fixed length + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + 0: aaXbbXa + 1: bXa + 2: bX + 3: a + +/ab(?C1)c/auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ + 0: abc + +/'ab(?C1)c'/hex,auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ + 0: abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + a-a9-a + +/[A-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + A-A9-A + +/[a-\d]+/ +Failed: error 150 at offset 5: invalid range in character class + a-a9-a + +/(?abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + Cond recurse any + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + 1 Cond ref + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?=.*[A-Z])/I +Capturing subpattern count = 0 +May match empty string +Subject length lower bound = 0 + +/()(?<=(?0))/ +Failed: error 125 at offset 2: lookbehind assertion is not fixed length + +/(?*?\g'0/use_length +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/.>*?\g'0/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/{„Í„ÍÍ„Í{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́ÍÍ„Í{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000 +** Group name in 'get' is too long +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 +** Too many characters in named 'get' modifiers + +"(?(?C))" +Failed: error 128 at offset 6: assertion expected after (?( or (?(?C) + +/(?(?(?(?(?(?))))))/ +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ +Failed: error 109 at offset 6: quantifier does not follow a repeatable item + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph +No match + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc +Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement string + +//replace=0 + \=offset=7 +Failed: error -33: bad offset value + +".+\QX\E+"B,no_auto_possess +------------------------------------------------------------------ + Bra + Any+ + X+ + Ket + End +------------------------------------------------------------------ + +".+\QX\E+"B,auto_callout,no_auto_possess +------------------------------------------------------------------ + Bra + Callout 255 0 4 + Any+ + Callout 255 4 4 + X+ + Callout 255 8 0 + Ket + End +------------------------------------------------------------------ + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I +Capturing subpattern count = 108 +Max back reference = 22 +Contains explicit CR or LF match +Subject length lower bound = 1 + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ +Failed: error 128 at offset 63: assertion expected after (?( or (?(?C) + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ +Failed: error 150 at offset 11: invalid range in character class + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +Failed: error 128 at offset 11: assertion expected after (?( or (?(?C) + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 +--->abcd + +0 ^ a + +1 ^^ b +Failed: error -37: callout error code + +/()(\g+65534)/ +Failed: error 161 at offset 11: group number is too big + +/()(\g+65533)/ +Failed: error 115 at offset 10: reference to non-existent subpattern + +/Á\x00\x00\x00š(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00ÿÿ\x00š(\1{50779}?)J\w2/I +Capturing subpattern count = 2 +Max back reference = 2 +First code unit = \xc1 +Last code unit = '2' +Subject length lower bound = 65535 + +/(a)(b)\2\1\1\1\1/I +Capturing subpattern count = 2 +Max back reference = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 7 + +/(?a)(?b)\g{b}\g{a}\g{a}\g{a}\g{a}(?xx)(?zz)/I,dupnames +Capturing subpattern count = 4 +Max back reference = 4 +Named capturing subpatterns: + a 1 + a 3 + b 2 + b 4 +Options: dupnames +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 11 + # End of testinput2 +Error -63: PCRE2_ERROR_BADDATA (unknown error number) +Error -62: bad serialized data +Error -2: partial match +Error -1: no match +Error 0: PCRE2_ERROR_BADDATA (unknown error number) +Error 100: no error +Error 188: pattern string is longer than the limit set by the application +Error 189: internal error: unknown code in parsed pattern +Error 190: internal error: bad code value in parsed_skip() +Error 191: PCRE2_ERROR_BADDATA (unknown error number) diff --git a/pcre2/testdata/testoutput20 b/pcre2/testdata/testoutput20 new file mode 100644 index 000000000..db9986603 --- /dev/null +++ b/pcre2/testdata/testoutput20 @@ -0,0 +1,150 @@ +# This set of tests exercises the serialization/deserialization and code copy +# functions in the library. It does not use UTF or JIT. + +#forbid_utf + +# Compile several patterns, push them onto the stack, and then write them +# all to a file. + +#pattern push + +/(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) + (?(DEFINE) + (?[a-z]+) + (?\d+) + )/x +/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i + +#save testsaved1 + +# Do it again for some more patterns. + +/(*MARK:A)(*SKIP:B)(C|X)/mark +** Ignored when compiled pattern is stacked with 'push': mark +/(?:(?foo)|(?bar))\k/dupnames + +#save testsaved2 +#pattern -push + +# Reload the patterns, then pop them one by one and check them. + +#load testsaved1 +#load testsaved2 + +#pop info +Capturing subpattern count = 2 +Max back reference = 2 +Named capturing subpatterns: + n 1 + n 2 +Options: dupnames +Starting code units: b f +Subject length lower bound = 6 + foofoo + 0: foofoo + 1: foo + barbar + 0: barbar + 1: + 2: bar + +#pop mark + C + 0: C + 1: C +MK: A +\= Expect no match + D +No match, mark = A + +#pop + AmanaplanacanalPanama + 0: AmanaplanacanalPanama + 1: + 2: + 3: AmanaplanacanalPanama + 4: A + +#pop info +Capturing subpattern count = 4 +Named capturing subpatterns: + ADDR 2 + ADDRESS_PAT 4 + NAME 1 + NAME_PAT 3 +Options: extended +Subject length lower bound = 3 + metcalfe 33 + 0: metcalfe 33 + 1: metcalfe + 2: 33 + +# Check for an error when different tables are used. + +/abc/push,tables=1 +/xyz/push,tables=2 +#save testsaved1 +Serialization failed: error -30: patterns do not all use the same character tables + +#pop + xyz + 0: xyz + +#pop + abc + 0: abc + +#pop should give an error +** Can't pop off an empty stack + pqr + +/abcd/pushcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +#pop should give an error +** Can't pop off an empty stack + +/abcd/push +#popcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +/abcd/push +#save testsaved1 +#pop should give an error +** Can't pop off an empty stack + +#load testsaved1 +#popcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +#pop should give an error +** Can't pop off an empty stack + +/abcd/pushtablescopy + abcd + 0: abcd + +#popcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +# End of testinput20 diff --git a/pcre2/testdata/testoutput21 b/pcre2/testdata/testoutput21 new file mode 100644 index 000000000..cba132611 --- /dev/null +++ b/pcre2/testdata/testoutput21 @@ -0,0 +1,94 @@ +# These are tests of \C that do not involve UTF. They are not run when \C is +# disabled by compiling with --enable-never-backslash-C. + +/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx +------------------------------------------------------------------ + Bra + AllAny+ + \D + AllAny+ + \d + AllAny+ + \S + AllAny+ + \s + AllAny+ + \W + AllAny+ + \w + AllAny+ + Any + AllAny+ + \R + AllAny+ + \H + AllAny+ + \h + AllAny+ + \V + AllAny+ + \v + AllAny+ + \Z + AllAny++ + \z + AllAny+ + $ + Ket + End +------------------------------------------------------------------ + +/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx +------------------------------------------------------------------ + Bra + \D+ + AllAny + \d+ + AllAny + \S+ + AllAny + \s+ + AllAny + \W+ + AllAny + \w+ + AllAny + Any+ + AllAny + \R+ + AllAny + \H+ + AllAny + \h+ + AllAny + \V+ + AllAny + \v+ + AllAny + a+ + AllAny + \x0a+ + AllAny + AllAny+ + AllAny + Ket + End +------------------------------------------------------------------ + +/ab\Cde/never_backslash_c +Failed: error 183 at offset 4: using \C is disabled by the application + +/ab\Cde/info +Capturing subpattern count = 0 +Contains \C +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + abXde + 0: abXde + +/(?<=ab\Cde)X/ + abZdeX + 0: X + +# End of testinput21 diff --git a/pcre2/testdata/testoutput22-16 b/pcre2/testdata/testoutput22-16 new file mode 100644 index 000000000..c161ff5a2 --- /dev/null +++ b/pcre2/testdata/testoutput22-16 @@ -0,0 +1,169 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info +Capturing subpattern count = 0 +Contains \C +Options: utf +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 0 + abXde + 0: abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf +Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-16 mode + ab!deXYZ + +# Autopossessification tests + +/\C+\X \X+\C/Bx +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C+\X \X+\C/Bx,utf +------------------------------------------------------------------ + Bra + Anybyte+ + extuni + extuni+ + Anybyte + Ket + End +------------------------------------------------------------------ + +/\C\X*TÓ…; +{0,6}\v+ F +/utf +\= Expect no match + Ó…\x0a +No match + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ +No match + +/X(\C{3})/utf + X\x{1234} +No match + X\x{11234}Y + 0: X\x{11234}Y + 1: \x{11234}Y + X\x{11234}YZ + 0: X\x{11234}Y + 1: \x{11234}Y + +/X(\C{4})/utf + X\x{1234}YZ +No match + X\x{11234}YZ + 0: X\x{11234}YZ + 1: \x{11234}YZ + X\x{11234}YZW + 0: X\x{11234}YZ + 1: \x{11234}YZ + +/X\C*/utf + XYZabcdce + 0: XYZabcdce + +/X\C*?/utf + XYZabcde + 0: X + +/X\C{3,5}/utf + Xabcdefg + 0: Xabcde + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{1234}\x{512}YZ + 0: X\x{1234}\x{512}YZ + X\x{11234}Y + 0: X\x{11234}Y + X\x{11234}YZ + 0: X\x{11234}YZ + X\x{11234}\x{512} + 0: X\x{11234}\x{512} + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512}YZ + X\x{11234}\x{512}\x{11234}Z + 0: X\x{11234}\x{512}\x{11234} + +/X\C{3,5}?/utf + Xabcdefg + 0: Xabc + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{11234}Y + 0: X\x{11234}Y + X\x{11234}YZ + 0: X\x{11234}Y + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512} + X\x{11234} +No match + +/a\Cb/utf + aXb + 0: aXb + a\nb + 0: a\x{0a}b + a\x{100}b + 0: a\x{100}b + +/a\C\Cb/utf + a\x{100}b +No match + a\x{12257}b + 0: a\x{12257}b + a\x{12257}\x{11234}b +No match + +/ab\Cde/utf + abXde + 0: abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + 0: X\x{1234} + 1: \x{1234} + 2: + X\nabc + 0: X\x{0a}abc + 1: \x{0a} + 2: abc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b + 0: a\x{100}b + +# End of testinput22 diff --git a/pcre2/testdata/testoutput22-32 b/pcre2/testdata/testoutput22-32 new file mode 100644 index 000000000..100333fb2 --- /dev/null +++ b/pcre2/testdata/testoutput22-32 @@ -0,0 +1,167 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info +Capturing subpattern count = 0 +Contains \C +Options: utf +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + abXde + 0: abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf + ab!deXYZ + 0: X + +# Autopossessification tests + +/\C+\X \X+\C/Bx +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C+\X \X+\C/Bx,utf +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C\X*TÓ…; +{0,6}\v+ F +/utf +\= Expect no match + Ó…\x0a +No match + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ +No match + +/X(\C{3})/utf + X\x{1234} +No match + X\x{11234}Y +No match + X\x{11234}YZ + 0: X\x{11234}YZ + 1: \x{11234}YZ + +/X(\C{4})/utf + X\x{1234}YZ +No match + X\x{11234}YZ +No match + X\x{11234}YZW + 0: X\x{11234}YZW + 1: \x{11234}YZW + +/X\C*/utf + XYZabcdce + 0: XYZabcdce + +/X\C*?/utf + XYZabcde + 0: X + +/X\C{3,5}/utf + Xabcdefg + 0: Xabcde + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{1234}\x{512}YZ + 0: X\x{1234}\x{512}YZ + X\x{11234}Y +No match + X\x{11234}YZ + 0: X\x{11234}YZ + X\x{11234}\x{512} +No match + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512}YZ + X\x{11234}\x{512}\x{11234}Z + 0: X\x{11234}\x{512}\x{11234}Z + +/X\C{3,5}?/utf + Xabcdefg + 0: Xabc + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{11234}Y +No match + X\x{11234}YZ + 0: X\x{11234}YZ + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512}Y + X\x{11234} +No match + +/a\Cb/utf + aXb + 0: aXb + a\nb + 0: a\x{0a}b + a\x{100}b + 0: a\x{100}b + +/a\C\Cb/utf + a\x{100}b +No match + a\x{12257}b +No match + a\x{12257}\x{11234}b + 0: a\x{12257}\x{11234}b + +/ab\Cde/utf + abXde + 0: abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + 0: X\x{1234} + 1: \x{1234} + 2: + X\nabc + 0: X\x{0a}abc + 1: \x{0a} + 2: abc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b + 0: a\x{100}b + +# End of testinput22 diff --git a/pcre2/testdata/testoutput22-8 b/pcre2/testdata/testoutput22-8 new file mode 100644 index 000000000..62b0dcc49 --- /dev/null +++ b/pcre2/testdata/testoutput22-8 @@ -0,0 +1,171 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info +Capturing subpattern count = 0 +Contains \C +Options: utf +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 0 + abXde + 0: abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf +Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-8 mode + ab!deXYZ + +# Autopossessification tests + +/\C+\X \X+\C/Bx +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C+\X \X+\C/Bx,utf +------------------------------------------------------------------ + Bra + Anybyte+ + extuni + extuni+ + Anybyte + Ket + End +------------------------------------------------------------------ + +/\C\X*TÓ…; +{0,6}\v+ F +/utf +\= Expect no match + Ó…\x0a +No match + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ +No match + +/X(\C{3})/utf + X\x{1234} + 0: X\x{1234} + 1: \x{1234} + X\x{11234}Y + 0: X\x{f0}\x{91}\x{88} + 1: \x{f0}\x{91}\x{88} + X\x{11234}YZ + 0: X\x{f0}\x{91}\x{88} + 1: \x{f0}\x{91}\x{88} + +/X(\C{4})/utf + X\x{1234}YZ + 0: X\x{1234}Y + 1: \x{1234}Y + X\x{11234}YZ + 0: X\x{11234} + 1: \x{11234} + X\x{11234}YZW + 0: X\x{11234} + 1: \x{11234} + +/X\C*/utf + XYZabcdce + 0: XYZabcdce + +/X\C*?/utf + XYZabcde + 0: X + +/X\C{3,5}/utf + Xabcdefg + 0: Xabcde + X\x{1234} + 0: X\x{1234} + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} + 0: X\x{1234}\x{512} + X\x{1234}\x{512}YZ + 0: X\x{1234}\x{512} + X\x{11234}Y + 0: X\x{11234}Y + X\x{11234}YZ + 0: X\x{11234}Y + X\x{11234}\x{512} + 0: X\x{11234}\x{d4} + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{d4} + X\x{11234}\x{512}\x{11234}Z + 0: X\x{11234}\x{d4} + +/X\C{3,5}?/utf + Xabcdefg + 0: Xabc + X\x{1234} + 0: X\x{1234} + X\x{1234}YZ + 0: X\x{1234} + X\x{1234}\x{512} + 0: X\x{1234} + X\x{11234}Y + 0: X\x{f0}\x{91}\x{88} + X\x{11234}YZ + 0: X\x{f0}\x{91}\x{88} + X\x{11234}\x{512}YZ + 0: X\x{f0}\x{91}\x{88} + X\x{11234} + 0: X\x{f0}\x{91}\x{88} + +/a\Cb/utf + aXb + 0: aXb + a\nb + 0: a\x{0a}b + a\x{100}b +No match + +/a\C\Cb/utf + a\x{100}b + 0: a\x{100}b + a\x{12257}b +No match + a\x{12257}\x{11234}b +No match + +/ab\Cde/utf + abXde + 0: abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + 0: X\x{1234} + 1: \x{e1} + 2: \x{88}\x{b4} + X\nabc + 0: X\x{0a}abc + 1: \x{0a} + 2: abc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b +No match + +# End of testinput22 diff --git a/pcre2/testdata/testoutput23 b/pcre2/testdata/testoutput23 new file mode 100644 index 000000000..c6f0aa21f --- /dev/null +++ b/pcre2/testdata/testoutput23 @@ -0,0 +1,8 @@ +# This test is run when PCRE2 has been built with --enable-never-backslash-C, +# which disables the use of \C. All we can do is check that it gives the +# correct error message. + +/a\Cb/ +Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library + +# End of testinput23 diff --git a/pcre2/testdata/testoutput3 b/pcre2/testdata/testoutput3 index dba3fc3ea..d9e8c5c1d 100644 --- a/pcre2/testdata/testoutput3 +++ b/pcre2/testdata/testoutput3 @@ -8,8 +8,7 @@ #forbid_utf /^[\w]+/ - *** Failers -No match +\= Expect no match École No match @@ -18,8 +17,7 @@ No match 0: École /^[\w]+/ - *** Failers -No match +\= Expect no match École No match @@ -28,30 +26,26 @@ No match 0: \xc9 /^[\W]+/locale=fr_FR - *** Failers - 0: *** +\= Expect no match École No match /[\b]/ \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /[\b]/locale=fr_FR \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /^\w+/ - *** Failers -No match +\= Expect no match École No match @@ -66,18 +60,14 @@ No match 2: cole /(.+)\b(.+)/locale=fr_FR - *** Failers - 0: *** Failers - 1: *** - 2: Failers +\= Expect no match École No match /École/i École 0: \xc9cole - *** Failers -No match +\= Expect no match école No match @@ -114,8 +104,7 @@ Subject length lower bound = 1 /^[\xc8-\xc9]/ École 0: É - *** Failers -No match +\= Expect no match école No match diff --git a/pcre2/testdata/testoutput3A b/pcre2/testdata/testoutput3A index 3044e4577..9b00be8b9 100644 --- a/pcre2/testdata/testoutput3A +++ b/pcre2/testdata/testoutput3A @@ -8,8 +8,7 @@ #forbid_utf /^[\w]+/ - *** Failers -No match +\= Expect no match École No match @@ -18,8 +17,7 @@ No match 0: École /^[\w]+/ - *** Failers -No match +\= Expect no match École No match @@ -28,30 +26,26 @@ No match 0: \xc9 /^[\W]+/locale=fr_FR - *** Failers - 0: *** +\= Expect no match École No match /[\b]/ \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /[\b]/locale=fr_FR \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /^\w+/ - *** Failers -No match +\= Expect no match École No match @@ -66,18 +60,14 @@ No match 2: cole /(.+)\b(.+)/locale=fr_FR - *** Failers - 0: *** Failers - 1: *** - 2: Failers +\= Expect no match École No match /École/i École 0: \xc9cole - *** Failers -No match +\= Expect no match école No match @@ -114,8 +104,7 @@ Subject length lower bound = 1 /^[\xc8-\xc9]/ École 0: É - *** Failers -No match +\= Expect no match école No match diff --git a/pcre2/testdata/testoutput3B b/pcre2/testdata/testoutput3B index b99d6fd7d..b30fc5f1f 100644 --- a/pcre2/testdata/testoutput3B +++ b/pcre2/testdata/testoutput3B @@ -8,8 +8,7 @@ #forbid_utf /^[\w]+/ - *** Failers -No match +\= Expect no match École No match @@ -18,8 +17,7 @@ No match 0: École /^[\w]+/ - *** Failers -No match +\= Expect no match École No match @@ -28,30 +26,26 @@ No match 0: \xc9 /^[\W]+/locale=fr_FR - *** Failers - 0: *** +\= Expect no match École No match /[\b]/ \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /[\b]/locale=fr_FR \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /^\w+/ - *** Failers -No match +\= Expect no match École No match @@ -66,18 +60,14 @@ No match 2: cole /(.+)\b(.+)/locale=fr_FR - *** Failers - 0: *** Failers - 1: *** - 2: Failers +\= Expect no match École No match /École/i École 0: \xc9cole - *** Failers -No match +\= Expect no match école No match @@ -114,8 +104,7 @@ Subject length lower bound = 1 /^[\xc8-\xc9]/ École 0: É - *** Failers -No match +\= Expect no match école No match diff --git a/pcre2/testdata/testoutput4 b/pcre2/testdata/testoutput4 index e8090a913..d2d5e5112 100644 --- a/pcre2/testdata/testoutput4 +++ b/pcre2/testdata/testoutput4 @@ -3,6 +3,7 @@ # some of the property tests may differ because of different versions of # Unicode in use by PCRE2 and Perl. +#newline_default lf anycrlf any #perltest /a.b/utf @@ -12,8 +13,7 @@ 0: a\x{7f}b a\x{100}b 0: a\x{100}b - *** Failers -No match +\= Expect no match a\nb No match @@ -27,8 +27,7 @@ No match a\x{4000}\x{100}yb 0: a\x{4000}\x{100}yb 1: \x{4000}\x{100}y - *** Failers -No match +\= Expect no match a\x{4000}b No match ac\ncb @@ -104,8 +103,7 @@ No match a\x{1234}\x{4321}\x{3412}b 0: a\x{1234}\x{4321}\x{3412}b 1: \x{1234}\x{4321}\x{3412} - *** Failers -No match +\= Expect no match a\x{1234}b No match ac\ncb @@ -127,8 +125,7 @@ No match a\x{1234}\x{4321}\x{3412}\x{3421}b 0: a\x{1234}\x{4321}\x{3412}\x{3421}b 1: \x{1234}\x{4321}\x{3412}\x{3421} - *** Failers -No match +\= Expect no match a\x{1234}b No match @@ -148,8 +145,7 @@ No match a\x{1234}\x{4321}\x{3412}\x{3421}b 0: a\x{1234}\x{4321}\x{3412}\x{3421}b 1: \x{1234}\x{4321}\x{3412}\x{3421} - *** Failers -No match +\= Expect no match a\x{1234}b No match @@ -175,8 +171,7 @@ No match axxxxxbcdefghijb 0: axxxxxb 1: xxxxx - *** Failers -No match +\= Expect no match a\x{1234}b No match axxxxxxbcdefghijb @@ -204,16 +199,14 @@ No match axxxxxbcdefghijb 0: axxxxxb 1: xxxxx - *** Failers -No match +\= Expect no match a\x{1234}b No match axxxxxxbcdefghijb No match /^[a\x{c0}]/utf - *** Failers -No match +\= Expect no match \x{100} No match @@ -232,8 +225,7 @@ No match /(?:\x{100}){3}b/utf \x{100}\x{100}\x{100}b 0: \x{100}\x{100}\x{100}b - *** Failers -No match +\= Expect no match \x{100}\x{100}b No match @@ -242,8 +234,7 @@ No match 0: \x{ab} \xc2\xab 0: \x{ab} - *** Failers -No match +\= Expect no match \x00{ab} No match @@ -254,8 +245,7 @@ No match \x{256}XYZ 0: X 1: \x{256} - *** Failers -No match +\= Expect no match XYZ No match @@ -379,8 +369,7 @@ No match /\D+/utf 12abcd34 0: abcd - *** Failers - 0: *** Failers +\= Expect no match 1234 No match @@ -389,8 +378,7 @@ No match 0: abc 12ab34 0: ab - *** Failers - 0: *** +\= Expect no match 1234 No match 12a34 @@ -401,8 +389,7 @@ No match 0: ab 12ab34 0: ab - *** Failers - 0: ** +\= Expect no match 1234 No match 12a34 @@ -411,16 +398,13 @@ No match /\d+/utf 12abcd34 0: 12 - *** Failers -No match /\d{2,3}/utf 12abcd34 0: 12 1234abcd 0: 123 - *** Failers -No match +\= Expect no match 1.4 No match @@ -429,16 +413,14 @@ No match 0: 12 1234abcd 0: 12 - *** Failers -No match +\= Expect no match 1.4 No match /\S+/utf 12abcd34 0: 12abcd34 - *** Failers - 0: *** +\= Expect no match \ \ No match @@ -447,8 +429,7 @@ No match 0: 12a 1234abcd 0: 123 - *** Failers - 0: *** +\= Expect no match \ \ No match @@ -457,8 +438,7 @@ No match 0: 12 1234abcd 0: 12 - *** Failers - 0: ** +\= Expect no match \ \ No match @@ -466,8 +446,6 @@ No match 12> <34 0: > < 0+ 34 - *** Failers -No match />\s{2,3} < 0+ ce - *** Failers -No match +\= Expect no match ab> < 0+ ce - *** Failers -No match +\= Expect no match ab> ^`|~ No match !\"#%&'()*,-./:;?@[\\]_{} @@ -3730,6 +3604,7 @@ No match 2: \x{23a}\x{2c65}\x{23a} /^(\x{23a})\1*(....)/i,utf +\= Expect no match \x{23a}\x{2c65}\x{2c65}\x{2c65} No match \x{23a}\x{23a}\x{2c65}\x{23a} @@ -3741,14 +3616,99 @@ No match "[\S\V\H]"utf -/\C\X*TÓ…; -{0,6}\v+ F -/utf - Ó…\x0a +/[^\p{Any}]*+x/utf + x + 0: x + +/[[:punct:]]/utf,ucp + \x{b4} No match -/\C(\W?Å¿)'?{{/utf - \\C(\\W?Å¿)'?{{ +/[[:^ascii:]]/utf,ucp + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} + \x{300} + 0: \x{300} + \x{37e} + 0: \x{37e} +\= Expect no match + aa No match + 99 +No match + +/[[:^ascii:]\w]/utf,ucp + aa + 0: a + 99 + 0: 9 + gg + 0: g + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} + \x{300} + 0: \x{300} + \x{37e} + 0: \x{37e} + +/[\w[:^ascii:]]/utf,ucp + aa + 0: a + 99 + 0: 9 + gg + 0: g + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} + \x{300} + 0: \x{300} + \x{37e} + 0: \x{37e} + +/[^[:ascii:]\W]/utf,ucp + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} +\= Expect no match + aa +No match + 99 +No match + gg +No match + \x{37e} +No match + +/[^[:^ascii:]\d]/utf,ucp + a + 0: a + ~ + 0: ~ + \a + 0: \x{07} + \x{7f} + 0: \x{7f} +\= Expect no match + 0 +No match + \x{389} +No match + \x{20ac} +No match + +/(?=.*b)\pL/ + 11bb + 0: b + +/(?(?=.*b)(?=.*b)\pL|.*c)/ + 11bb + 0: b # End of testinput4 diff --git a/pcre2/testdata/testoutput5 b/pcre2/testdata/testoutput5 index a99c12b8b..9651fd1c3 100644 --- a/pcre2/testdata/testoutput5 +++ b/pcre2/testdata/testoutput5 @@ -3,6 +3,8 @@ # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12). +#newline_default lf any anycrlf + # PCRE2 and Perl disagree about the characteristics of certain Unicode # characters. For example, 061C is considered by Perl to be Arabic, though # is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are @@ -11,14 +13,12 @@ # test 4. /^[\p{Arabic}]/utf - ** Failers -No match +\= Expect no match \x{061c} No match /^[[:graph:]]+$/utf,ucp - ** Failers -No match +\= Expect no match \x{61c} No match \x{2066} @@ -31,8 +31,7 @@ No match No match /^[[:print:]]+$/utf,ucp - ** Failers - 0: ** Failers +\= Expect no match \x{61c} No match \x{2066} @@ -76,6 +75,7 @@ No match 0: A\x{85}\x{2005}Z /^[[:graph:]]+$/utf,ucp +\= Expect no match \x{180e} No match @@ -88,6 +88,7 @@ No match 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e} /^[[:^print:]]+$/utf,ucp +\= Expect no match \x{180e} No match @@ -182,10 +183,6 @@ Subject length lower bound = 3 \x{212ab}\x{212ab}\x{212ab}\x{861} 0: \x{212ab}\x{212ab}\x{212ab} -/(?<=\C)X/utf -Failed: error 136 at offset 6: \C is not allowed in a lookbehind assertion - Should produce an error diagnostic - /^[ab]/IB,utf ------------------------------------------------------------------ Bra @@ -200,8 +197,7 @@ Overall options: anchored utf Subject length lower bound = 1 bar 0: b - *** Failers -No match +\= Expect no match c No match \x{ff} @@ -227,8 +223,7 @@ Subject length lower bound = 1 0: \x{ff} \x{100} 0: \x{100} - *** Failers - 0: * +\= Expect no match aaa No match @@ -251,8 +246,7 @@ No match \x{100}\x{100}"12" 0: \x{100}\x{100}"12" 1: "12" - *** Failers -No match +\= Expect no match \x{100}\x{100}abcd No match @@ -303,8 +297,7 @@ Failed: error 108 at offset 15: range out of order in character class 0: \x{100} \x{104} 0: \x{104} - *** Failers -No match +\= Expect no match \x{105} No match \x{ff} @@ -581,8 +574,7 @@ Matched, but too many substrings 0: a\x{2028}b a\x{2029}b 0: a\x{2029}b - ** Failers -No match +\= Expect no match a\n\rb No match @@ -623,8 +615,7 @@ No match 0: a\x{0a}\x{0d}b a\n\r\x{85}\x0cb 0: a\x{0a}\x{0d}\x{85}\x{0c}b - ** Failers -No match +\= Expect no match ab No match @@ -643,8 +634,7 @@ No match 0: a\x{0a}\x{0d}\x{0a}\x{0d}b a\n\n\r\nb 0: a\x{0a}\x{0a}\x{0d}\x{0a}b - ** Failers -No match +\= Expect no match a\n\n\n\rb No match a\r @@ -655,8 +645,7 @@ No match 0: X X\x{0a} X\x09X\x0b 0: X\x{09}X\x{0b} - ** Failers -No match +\= Expect no match \x{a0} X\x0a No match @@ -667,8 +656,7 @@ No match 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d} \x09\x20\x{a0}\x0a\x0b\x0c 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} - ** Failers -No match +\= Expect no match \x09\x20\x{a0}\x0a\x0b No match @@ -677,8 +665,7 @@ No match 0: \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} 0: X\x{180e}X\x{85} - ** Failers -No match +\= Expect no match \x{2009} X\x0a No match @@ -689,8 +676,7 @@ No match 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028} \x09\x20\x{202f}\x0a\x0b\x0c 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c} - ** Failers -No match +\= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b No match @@ -755,8 +741,7 @@ Subject length lower bound = 3 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b - ** Failers -No match +\= Expect no match a\x{85}b No match a\x0bb @@ -793,8 +778,7 @@ Subject length lower bound = 2 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b - ** Failers -No match +\= Expect no match a\x{85}b No match a\x0bb @@ -817,14 +801,11 @@ Subject length lower bound = 2 0: a\x{85}b a\x0bb 0: a\x{0b}b - ** Failers -No match /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR 0: ABCaXYZ=!bPQR - ** Failers -No match +\= Expect no match a\x{2029}b No match \x61\xe2\x80\xa9\x62 @@ -838,8 +819,7 @@ Failed: error 130 at offset 3: unknown POSIX class name 0: a\x{1234}b a\nb 0: a\x{0a}b - ** Failers -No match +\= Expect no match ab No match @@ -848,8 +828,7 @@ No match 0: aXb a\nX\nX\x{1234}b 0: a\x{0a}X\x{0a}X\x{1234}b - ** Failers -No match +\= Expect no match ab No match @@ -935,6 +914,7 @@ Partial match: X\x{123}\x{123}\x{123} Partial match: X\x{123}\x{123}\x{123}\x{123} /X\x{123}{2,4}b/utf +\= Expect no match Xx\=ps No match X\x{123}x\=ps @@ -947,6 +927,7 @@ No match No match /X\x{123}{2,4}?b/utf +\= Expect no match Xx\=ps No match X\x{123}x\=ps @@ -959,6 +940,7 @@ No match No match /X\x{123}{2,4}+b/utf +\= Expect no match Xx\=ps No match X\x{123}x\=ps @@ -1745,6 +1727,7 @@ Last code unit = 'y' Subject length lower bound = 2 /(?= 0xd800 && <= 0xdfff) +Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /^a+[a\x{200}]/B,utf ------------------------------------------------------------------ @@ -1874,8 +1857,7 @@ Subject length lower bound = 1 0: 12-34 12+\x{661}-34 0: 12+\x{661}-34 - ** Failers -No match +\= Expect no match abcd No match @@ -1995,8 +1977,7 @@ No match /^\p{Cs}/utf \x{dfff}\=no_utf_check 0: \x{dfff} - ** Failers -No match +\= Expect no match \x{09f} No match @@ -2021,8 +2002,7 @@ No match 0: $\x{a2}\x{a3}\x{a4}\x{a5} \x{9f2} 0: \x{9f2} - ** Failers -No match +\= Expect no match X No match \x{2c2} @@ -2039,8 +2019,7 @@ No match 0: \x{2000} \x{2001} 0: \x{2001} - ** Failers -No match +\= Expect no match \x{2028} No match \x{200d} @@ -2052,16 +2031,14 @@ No match /\p{^Lu}/i,utf 1234 0: 1 - ** Failers - 0: * +\= Expect no match ABC No match /\P{Lu}/i,utf 1234 0: 1 - ** Failers - 0: * +\= Expect no match ABC No match @@ -2070,8 +2047,7 @@ No match 0: a Az 0: z - ** Failers - 0: a +\= Expect no match ABC No match @@ -2080,8 +2056,7 @@ No match 0: A a\x{10a0}B 0: \x{10a0} - ** Failers - 0: F +\= Expect no match a No match \x{1d00} @@ -2092,8 +2067,7 @@ No match 0: A aZ 0: Z - ** Failers - 0: F +\= Expect no match abc No match @@ -2182,16 +2156,14 @@ No match 0: \x{a6c} \x{10a7} 0: \x{10a7} - ** Failers -No match +\= Expect no match _ABC No match /^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} - ** Failers -No match +\= Expect no match _ABC No match @@ -2222,16 +2194,14 @@ No match 0: \x{a6c} \x{10a7} 0: \x{10a7} - ** Failers -No match +\= Expect no match _ABC No match /^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} - ** Failers -No match +\= Expect no match _ABC No match @@ -2240,8 +2210,7 @@ No match 0: >\x{1680} >\x{a0} 0: >\x{a0} - ** Failers -No match +\= Expect no match \x{0b} No match @@ -2278,8 +2247,7 @@ No match 0: >\x{1680} >\x{a0} 0: >\x{a0} - ** Failers -No match +\= Expect no match \x{0b} No match @@ -2324,8 +2292,7 @@ No match 0: \x{10a7} _ABC 0: _ - ** Failers -No match +\= Expect no match [] No match @@ -2362,8 +2329,7 @@ No match 0: \x{10a7} _ABC 0: _ - ** Failers -No match +\= Expect no match [] No match @@ -2630,8 +2596,7 @@ No match /\b...\B/utf abc_ 0: abc - ** Failers - 0: Fai +\= Expect no match \x{37e}abc\x{376} No match \x{37e}\x{376}\x{371}\x{393}\x{394} @@ -2825,10 +2790,12 @@ No match # These behaved oddly in Perl, so they are kept in this test /(\x{23a}\x{23a}\x{23a})?\1/i,utf +\= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} No match /(ȺȺȺ)?\1/i,utf +\= Expect no match ȺȺȺⱥⱥ No match @@ -2843,10 +2810,12 @@ No match 1: \x{23a}\x{23a}\x{23a} /(\x{23a}\x{23a}\x{23a})\1/i,utf +\= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} No match /(ȺȺȺ)\1/i,utf +\= Expect no match ȺȺȺⱥⱥ No match @@ -2887,8 +2856,7 @@ No match 0: \x{1bc0} \x{1bff} 0: \x{1bff} - ** Failers -No match +\= Expect no match \x{1bf4} No match @@ -2897,8 +2865,7 @@ No match 0: \x{11000} \x{1106f} 0: \x{1106f} - ** Failers -No match +\= Expect no match \x{1104e} No match @@ -2907,8 +2874,7 @@ No match 0: \x{840} \x{85e} 0: \x{85e} - ** Failers -No match +\= Expect no match \x{85c} No match \x{85d} @@ -2933,14 +2899,10 @@ No match /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aX41z 0: aX41z - *** Failers -No match +\= Expect no match aAz No match -/(?<=ab\Cde)X/utf -Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion - /\X/ a\=ps 0: a @@ -3138,8 +3100,7 @@ Subject length lower bound = 3 0+ /\x{3a3}++./i,utf,aftertext - ** Failers -No match +\= Expect no match \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} No match @@ -3179,24 +3140,29 @@ No match Ket End ------------------------------------------------------------------ +\= Expect no match ikt No match /is+t/i,utf iSs\x{17f}t 0: iSs\x{17f}t +\= Expect no match ikt No match /is+?t/i,utf +\= Expect no match ikt No match /is?t/i,utf +\= Expect no match ikt No match /is{2}t/i,utf +\= Expect no match iskt No match @@ -3211,80 +3177,70 @@ No match 0: ` \x{1234}abc 0: \x{1234} - ** Failers -No match +\= Expect no match abc No match /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}+?/utf $@`\x{a0}\x{1234}\x{e000}** 0: $ - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000}* - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234} - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}{3,5}?/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@` - ** Failers -No match +\= Expect no match \x{9f} No match /^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** 0: $ - ** Failers -No match +\= Expect no match \x{9f} No match /^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} - ** Failers -No match +\= Expect no match \x{9f} No match /^\P{Xuc}/utf abc 0: a - ** Failers - 0: * +\= Expect no match $abc No match @abc @@ -3297,8 +3253,7 @@ No match /^[\P{Xuc}]/utf abc 0: a - ** Failers - 0: * +\= Expect no match $abc No match @abc @@ -3843,7 +3798,7 @@ No match End ------------------------------------------------------------------ -/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx +/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx ------------------------------------------------------------------ Bra \D+ @@ -3858,8 +3813,6 @@ No match extuni \w+ extuni - AllAny+ - extuni \R+ extuni \H+ @@ -3898,7 +3851,7 @@ No match End ------------------------------------------------------------------ -/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx +/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx ------------------------------------------------------------------ Bra extuni+ @@ -3916,8 +3869,6 @@ No match extuni+ Any extuni+ - AllAny - extuni+ \R extuni+ \H @@ -4003,12 +3954,9 @@ Subject length lower bound = 1 1: 123X\x{1234}Z123 /(?<=abc)(|def)/g,utf,replace=<$0> - 123abcáyzabcdef789abcሴqr + 123abcáyzabcdef789abcሴqr 4: 123abc<>\x{e1}yzabc<>789abc<>\x{1234}qr -/[^\xff]((?1))/utf,debug -Failed: error 140 at offset 11: recursion could loop indefinitely - /[A-`]/iB,utf ------------------------------------------------------------------ Bra @@ -4050,4 +3998,238 @@ Failed: error 122 at offset 1227: unmatched closing parenthesis /$(&.+[\p{Me}].\s\xdcC*?(?())(?)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ +"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" +Failed: error 162 at offset 113: subpattern name expected + +/[\pS#moq]/ + = + 0: = + +/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark + cxxxz + 0: xxx +MK: a\x{12345}b\x{09}(d)c + +/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended + abcd + 1: x\x{824}y\x{6db}z(12\$34$$\x345$) + +/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended + a\x{e0}\x{101}\x{c0}\x{102} + 1: a\x{c0}\x{101}\x{c0}\x{100}\x{e0}\x{101}\x{e0}\x{102}\x{e0}\x{103}ab\x{c0}\x{100}\x{f0}\x{161}Done + +/((?\d)|(?\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> + ab12cde + 7: + +/(*UCP)(*UTF)[[:>:]]X/B +------------------------------------------------------------------ + Bra + \b + AssertB + Reverse + prop Xwd + Ket + X + Ket + End +------------------------------------------------------------------ + +/abc/utf,replace=xyz + abc\=zero_terminate + 1: xyz + +/a[[:punct:]b]/ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[[:punct:]b]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[b[:punct:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/[[:^ascii:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [\x80-\xff] (neg) + Ket + End +------------------------------------------------------------------ + +/[[:^ascii:]\w]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[\w[:^ascii:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[^[:ascii:]\W]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [^\x00-\x7f\P{Xwd}] + Ket + End +------------------------------------------------------------------ + \x{de} + 0: \x{de} + \x{200} + 0: \x{200} +\= Expect no match + \x{300} +No match + \x{37e} +No match + +/[[:^ascii:]a]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [a\x80-\xff] (neg) + Ket + End +------------------------------------------------------------------ + +/L(?#(|++\x{0a}\x{123}\x{123}\x{123}\x{123} + +0 ^ . + +0 ^ . + +1 ^ ^ . + +2 ^ ^ + 0: \x{123}\x{123} + +# This tests processing wide characters in extended mode. + +/XÈ€/x,utf + +# These three test a bug fix that was not clearing up after a locale setting +# when the test or a subsequent one matched a wide character. + +//locale=C + +/[\P{Yi}]/utf +\x{2f000} + 0: \x{2f000} + +/[\P{Yi}]/utf,locale=C +\x{2f000} + 0: \x{2f000} + +/^(?a*)\d/ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9876 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9 - *** Failers -No match +\= Expect no match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match @@ -684,8 +630,7 @@ No match 0: def> 0: <> - *** Failers -No match +\= Expect no match ababab - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +4 ^ c +2 ^^ b @@ -793,7 +732,7 @@ No match 0: ababab abcdabcd --->abcdabcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +4 ^ c +2 ^^ b @@ -801,7 +740,7 @@ No match +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +2 ^ ^ b @@ -810,13 +749,13 @@ No match +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +12 ^ ^ 0: abcdabcd 1: abcdab abcdcdcdcdcd --->abcdcdcdcdcd - +0 ^ (ab|cd){3,4} + +0 ^ ( +1 ^ a +4 ^ c +2 ^^ b @@ -824,16 +763,16 @@ No match +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +12 ^ ^ +1 ^ ^ a +4 ^ ^ c +5 ^ ^ d - +6 ^ ^ ) + +6 ^ ^ ){3,4} +12 ^ ^ 0: abcdcdcd 1: abcdcd @@ -841,8 +780,7 @@ No match /^abc/ abcdef 0: abc - *** Failers -No match +\= Expect no match abcdef\=notbol No match @@ -856,8 +794,7 @@ No match 1: xyz\=notempty 0: xyz - *** Failers - 0: +\= Expect no match bcd\=notempty No match @@ -866,8 +803,7 @@ No match 0: xyz xyz\n 0: xyz - *** Failers -No match +\= Expect no match xyz\=noteol No match xyz\n\=noteol @@ -884,8 +820,7 @@ No match 0: xyz xyz\n\=noteol 0: xyz - *** Failers -No match +\= Expect no match xyz\=noteol No match @@ -894,8 +829,7 @@ No match 0: abc defabcxyz\=offset=3 0: abc - *** Failers -No match +\= Expect no match defabcxyz No match @@ -906,8 +840,7 @@ Partial match: ab Partial match: abcde abcdef\=ps 0: abcdef - *** Failers -No match +\= Expect no match abx\=ps No match @@ -928,8 +861,7 @@ Partial match: aaaa12345 0: aa0z aaaa4444444444444z\=ps 0: aaaa4444444444444z - *** Failers -No match +\= Expect no match az\=ps No match aaaaa\=ps @@ -944,27 +876,20 @@ Partial match: abc 0: def /(?<=foo)bar/ - xyzfo\=ps -No match foob\=ps,offset=2 Partial match: foob <<< foobar...\=ps,dfa_restart,offset=4 0: ar - xyzfo\=ps -No match foobar\=offset=2 0: bar - *** Failers -No match +\= Expect no match xyzfo\=ps No match obar\=dfa_restart No match /(ab*(cd|ef))+X/ - adfadadaklhlkalkajhlkjahdfasdfasdfladsfjkj\=ps,noteol -No match lkjhlkjhlkjhlkjhabbbbbbcdaefabbbbbbbefa\=ps,notbol,noteol Partial match: abbbbbbcdaefabbbbbbbefa cdabbbbbbbb\=ps,notbol,dfa_restart,noteol @@ -973,14 +898,18 @@ Partial match: cdabbbbbbbb Partial match: efabbbbbbbbbbbbbbbb bbbbbbbbbbbbcdXyasdfadf\=ps,notbol,dfa_restart,noteol 0: bbbbbbbbbbbbcdX +\= Expect no match + adfadadaklhlkalkajhlkjahdfasdfasdfladsfjkj\=ps,noteol +No match /the quick brown fox/ the quick brown fox 0: the quick brown fox - The quick brown FOX -No match What do you know about the quick brown fox? 0: the quick brown fox +\= Expect no match + The quick brown FOX +No match What do you know about THE QUICK BROWN FOX? No match @@ -1059,8 +988,7 @@ No match 0: aaaabxyzpqrrrabbxyyyypqAzz >>>>abcxyzpqrrrabbxyyyypqAzz 0: abcxyzpqrrrabbxyyyypqAzz - *** Failers -No match +\= Expect no match abxyzpqrrabbxyyyypqAzz No match abxyzpqrrrrabbxyyyypqAzz @@ -1079,8 +1007,7 @@ No match 0: abczz abcabczz 0: abcabczz - *** Failers -No match +\= Expect no match zz No match abcabcabczz @@ -1105,8 +1032,7 @@ No match 0: abbbbbbbbbbbc bbbbbbbbbbbac 0: bbbbbbbbbbbac - *** Failers -No match +\= Expect no match aaac No match abbbbbbbbbbbac @@ -1129,8 +1055,7 @@ No match 0: abbbbbbbbbbbc bbbbbbbbbbbac 0: bbbbbbbbbbbac - *** Failers -No match +\= Expect no match aaac No match abbbbbbbbbbbac @@ -1147,8 +1072,7 @@ No match 0: bbabc bababc 0: bababc - *** Failers -No match +\= Expect no match bababbc No match babababc @@ -1161,8 +1085,7 @@ No match 0: bbabc bababc 0: bababc - *** Failers -No match +\= Expect no match bababbc No match babababc @@ -1185,8 +1108,7 @@ No match 0: d ething 0: e - *** Failers -No match +\= Expect no match fthing No match [thing @@ -1203,8 +1125,7 @@ No match 0: d ething 0: e - *** Failers -No match +\= Expect no match athing No match fthing @@ -1217,8 +1138,7 @@ No match 0: [ \\thing 0: \ - *** Failers - 0: * +\= Expect no match athing No match bthing @@ -1237,8 +1157,7 @@ No match 0: a fthing 0: f - *** Failers - 0: * +\= Expect no match ]thing No match cthing @@ -1281,8 +1200,7 @@ No match 0: 10 100 0: 100 - *** Failers -No match +\= Expect no match abc No match @@ -1299,8 +1217,7 @@ No match 0: xxx0 xxx1234 0: xxx1234 - *** Failers -No match +\= Expect no match xxx No match @@ -1311,12 +1228,11 @@ No match 0: xx123 123456 0: 123456 - *** Failers -No match - 123 -No match x1234 0: x1234 +\= Expect no match + 123 +No match /^.+?[0-9][0-9][0-9]$/ x123 @@ -1325,18 +1241,16 @@ No match 0: xx123 123456 0: 123456 - *** Failers -No match - 123 -No match x1234 0: x1234 +\= Expect no match + 123 +No match /^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ abc!pqr=apquxz.ixr.zzz.ac.uk 0: abc!pqr=apquxz.ixr.zzz.ac.uk - *** Failers -No match +\= Expect no match !pqr=apquxz.ixr.zzz.ac.uk No match abc!=apquxz.ixr.zzz.ac.uk @@ -1349,7 +1263,8 @@ No match /:/ Well, we need a colon: somewhere 0: : - *** Fail if we don't +\= Expect no match + No match without a colon No match /([\da-f:]+)$/i @@ -1369,8 +1284,7 @@ No match 0: def Any old stuff 0: ff - *** Failers -No match +\= Expect no match 0zzz No match gzzz @@ -1385,8 +1299,7 @@ No match 0: .1.2.3 A.12.123.0 0: A.12.123.0 - *** Failers -No match +\= Expect no match .1.2.3333 No match 1.2.3 @@ -1399,8 +1312,7 @@ No match 0: 1 IN SOA non-sp1 non-sp2( 1 IN SOA non-sp1 non-sp2 ( 0: 1 IN SOA non-sp1 non-sp2 ( - *** Failers -No match +\= Expect no match 1IN SOA non-sp1 non-sp2( No match @@ -1417,8 +1329,7 @@ No match 0: sxk.zzz.ac.uk. x-.y-. 0: x-.y-. - *** Failers -No match +\= Expect no match -abc.peq. No match @@ -1431,8 +1342,7 @@ No match 0: *.c3-b.c *.c-a.b-c 0: *.c-a.b-c - *** Failers -No match +\= Expect no match *.0 No match *.a- @@ -1469,22 +1379,18 @@ No match 0: "abcd" ; \"\" ; rhubarb 0: "" ; rhubarb - *** Failers -No match +\= Expect no match \"1234\" : things No match /^$/ \ 0: - *** Failers -No match / ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x ab c 0: ab c - *** Failers -No match +\= Expect no match abc No match ab cde @@ -1493,8 +1399,7 @@ No match /(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ ab c 0: ab c - *** Failers -No match +\= Expect no match abc No match ab cde @@ -1505,8 +1410,7 @@ No match 0: a bcd a b d 0: a b d - *** Failers -No match +\= Expect no match abcd No match ab d @@ -1621,8 +1525,7 @@ No match 0: 12345678ab 12345678__ 0: 12345678__ - *** Failers -No match +\= Expect no match 1234567 No match @@ -1635,8 +1538,7 @@ No match 0: 12345 aaaaa 0: aaaaa - *** Failers -No match +\= Expect no match 123456 No match @@ -1664,8 +1566,7 @@ No match 0: From abcd Mon Sep 01 12:33 From abcd Mon Sep 1 12:33:02 1997 0: From abcd Mon Sep 1 12:33 - *** Failers -No match +\= Expect no match From abcd Sep 01 12:33:02 1997 No match @@ -1696,8 +1597,7 @@ No match /^(\D*)(?=\d)(?!123)/ abc456 0: abc - *** Failers -No match +\= Expect no match abc123 No match @@ -1723,16 +1623,14 @@ No match /(?!^)abc/ the abc 0: abc - *** Failers -No match +\= Expect no match abc No match /(?=^)abc/ abc 0: abc - *** Failers -No match +\= Expect no match the abc No match @@ -1988,8 +1886,7 @@ No match A missing angle .*/)foo" +\= Expect no match /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ No match @@ -3708,16 +3568,14 @@ No match 0: .230003938 1.875000282 0: .875000282 - *** Failers -No match +\= Expect no match 1.235 No match /^((?>\w+)|(?>\s+))*$/ now is the time for all good men to come to the aid of the party 0: now is the time for all good men to come to the aid of the party - *** Failers -No match +\= Expect no match this is not a line with only words and spaces! No match @@ -3737,8 +3595,7 @@ No match /((?>\d+))(\w)/ 12345a 0: 12345a - *** Failers -No match +\= Expect no match 12345+ No match @@ -3786,8 +3643,7 @@ No match 0: (abc) (abc(def)xyz) 0: (abc(def)xyz) - *** Failers -No match +\= Expect no match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match @@ -3796,8 +3652,7 @@ No match 0: ab Ab 0: Ab - *** Failers -No match +\= Expect no match aB No match AB @@ -3806,8 +3661,7 @@ No match /(a (?x)b c)d e/ a bcd e 0: a bcd e - *** Failers -No match +\= Expect no match a b cd e No match abcd e @@ -3818,8 +3672,7 @@ No match /(a b(?x)c d (?-x)e f)/ a bcde f 0: a bcde f - *** Failers -No match +\= Expect no match abcdef No match @@ -3828,8 +3681,7 @@ No match 0: abc aBc 0: aBc - *** Failers -No match +\= Expect no match abC No match aBC @@ -3848,8 +3700,7 @@ No match 0: abc aBc 0: aBc - *** Failers -No match +\= Expect no match ABC No match abC @@ -3862,8 +3713,7 @@ No match 0: aBc aBBc 0: aBBc - *** Failers -No match +\= Expect no match aBC No match aBBC @@ -3874,8 +3724,7 @@ No match 0: abcd abCd 0: abCd - *** Failers -No match +\= Expect no match aBCd No match abcD @@ -3888,8 +3737,7 @@ No match 0: more than MILLION more \n than Million 0: more \x0a than Million - *** Failers -No match +\= Expect no match MORE THAN MILLION No match more \n than \n million @@ -3902,8 +3750,7 @@ No match 0: more than MILLION more \n than Million 0: more \x0a than Million - *** Failers -No match +\= Expect no match MORE THAN MILLION No match more \n than \n million @@ -3916,8 +3763,7 @@ No match 0: aBbc aBBc 0: aBBc - *** Failers -No match +\= Expect no match Abc No match abAb @@ -3930,8 +3776,7 @@ No match 0: abc aBc 0: aBc - *** Failers -No match +\= Expect no match Ab No match abC @@ -3944,8 +3789,7 @@ No match 0: xxc aBxxc 0: xxc - *** Failers -No match +\= Expect no match Abxxc No match ABxxc @@ -3958,8 +3802,7 @@ No match 0: abc: 12 0: 12 - *** Failers -No match +\= Expect no match 123 No match xyz @@ -3970,8 +3813,7 @@ No match 0: abc: 12 0: 12 - *** Failers -No match +\= Expect no match 123 No match xyz @@ -3986,8 +3828,7 @@ No match 0: cat focat 0: cat - *** Failers -No match +\= Expect no match foocat No match @@ -4000,8 +3841,7 @@ No match 0: cat focat 0: cat - *** Failers -No match +\= Expect no match foocat No match @@ -4182,8 +4022,7 @@ No match 0: 12-sep-98 12-09-98 0: 12-09-98 - *** Failers -No match +\= Expect no match sep-12-98 No match @@ -4212,8 +4051,7 @@ No match 0: bbx BBx 0: BBx - *** Failers -No match +\= Expect no match abcX No match aBCX @@ -4238,8 +4076,7 @@ No match 0: f France 0: F - *** Failers -No match +\= Expect no match Africa No match @@ -4256,8 +4093,7 @@ No match 0: z Zambesi 0: Z - *** Failers -No match +\= Expect no match aCD No match XY @@ -4266,8 +4102,7 @@ No match /(?<=foo\n)^bar/m foo\nbar 0: bar - *** Failers -No match +\= Expect no match bar No match baz\nbar @@ -4280,16 +4115,14 @@ No match 0: baz koobarbaz 0: baz - *** Failers -No match +\= Expect no match baz No match foobarbaz No match -/The following tests are taken from the Perl 5.005 test suite; some of them/ -/are compatible with 5.004, but I'd rather not have to sort them out./ -No match +# The following tests are taken from the Perl 5.005 test suite; some of them +# are compatible with 5.004, but I'd rather not have to sort them out. /abc/ abc @@ -4298,8 +4131,7 @@ No match 0: abc ababc 0: abc - *** Failers -No match +\= Expect no match xbc No match axc @@ -4334,8 +4166,7 @@ No match /ab+bc/ abbc 0: abbc - *** Failers -No match +\= Expect no match abc No match abq @@ -4358,8 +4189,7 @@ No match 0: abbbbc /ab{4,5}bc/ - *** Failers -No match +\= Expect no match abq No match abbbbc @@ -4388,8 +4218,7 @@ No match /^abc$/ abc 0: abc - *** Failers -No match +\= Expect no match abbbbc No match abcc @@ -4404,10 +4233,9 @@ No match /abc$/ aabc 0: abc - *** Failers -No match aabc 0: abc +\= Expect no match aabcd No match @@ -4432,8 +4260,7 @@ No match /a[bc]d/ abd 0: abd - *** Failers -No match +\= Expect no match axyzd No match abc @@ -4466,8 +4293,7 @@ No match /a[^bc]d/ aed 0: aed - *** Failers -No match +\= Expect no match abd No match abd @@ -4480,10 +4306,9 @@ No match /a[^]b]c/ adc 0: adc - *** Failers -No match a-c 0: a-c +\= Expect no match a]c No match @@ -4496,8 +4321,7 @@ No match 0: a /\by\b/ - *** Failers -No match +\= Expect no match xy No match yz @@ -4506,8 +4330,7 @@ No match No match /\Ba\B/ - *** Failers - 0: a +\= Expect no match a- No match -a @@ -4534,10 +4357,7 @@ No match /\W/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match a No match @@ -4548,10 +4368,7 @@ No match /a\Sb/ a-b 0: a-b - *** Failers -No match - a-b - 0: a-b +\= Expect no match a b No match @@ -4562,10 +4379,7 @@ No match /\D/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match 1 No match @@ -4576,10 +4390,7 @@ No match /[\W]/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match a No match @@ -4590,10 +4401,7 @@ No match /a[\S]b/ a-b 0: a-b - *** Failers -No match - a-b - 0: a-b +\= Expect no match a b No match @@ -4604,10 +4412,7 @@ No match /[\D]/ - 0: - - *** Failers - 0: * - - - 0: - +\= Expect no match 1 No match @@ -4634,6 +4439,9 @@ No match 0: a((b /a\\b/ + a\\b + 0: a\b +\= Expect no match a\b No match @@ -4695,14 +4503,11 @@ No match 0: cde /abc/ - *** Failers -No match +\= Expect no match b No match - /a*/ - /([abc])*d/ abbbcd @@ -4774,8 +4579,7 @@ No match 0: adcdcde /a[bcd]+dcdcde/ - *** Failers -No match +\= Expect no match abcde No match adcdcde @@ -4804,8 +4608,7 @@ No match 0: ij reffgz 0: effgz - *** Failers -No match +\= Expect no match effg No match bcdd @@ -4820,8 +4623,7 @@ No match 0: a /multiple words of text/ - *** Failers -No match +\= Expect no match aa No match uh-uh @@ -4860,8 +4662,7 @@ No match 0: ABC ABABC 0: ABC - *** Failers -No match +\= Expect no match aaxabxbaxbbx No match XBC @@ -4894,8 +4695,7 @@ No match 0: ABBC /ab+bc/i - *** Failers -No match +\= Expect no match ABC No match ABQ @@ -4920,8 +4720,7 @@ No match 0: ABBBBC /ab{4,5}?bc/i - *** Failers -No match +\= Expect no match ABQ No match ABBBBC @@ -4950,8 +4749,7 @@ No match /^abc$/i ABC 0: ABC - *** Failers -No match +\= Expect no match ABBBBC No match ABCC @@ -4986,10 +4784,9 @@ No match 0: AXYZC /a.*c/i - *** Failers -No match AABC 0: AABC +\= Expect no match AXYZD No match @@ -5000,8 +4797,7 @@ No match /a[b-d]e/i ACE 0: ACE - *** Failers -No match +\= Expect no match ABC No match ABD @@ -5034,8 +4830,7 @@ No match /a[^-b]c/i ADC 0: ADC - *** Failers -No match +\= Expect no match ABD No match A-C @@ -5056,8 +4851,7 @@ No match 0: EF /$b/i - *** Failers -No match +\= Expect no match A]C No match B @@ -5074,6 +4868,7 @@ No match 0: A((B /a\\b/i +\= Expect no match A\=notbol No match @@ -5152,7 +4947,6 @@ No match /abc/i /a*/i - /([abc])*d/i ABBBCD @@ -5189,6 +4983,7 @@ No match 0: HIJ /^(ab|cd)e/i +\= Expect no match ABCDE No match @@ -5250,8 +5045,7 @@ No match 0: IJ REFFGZ 0: EFFGZ - *** Failers -No match +\= Expect no match ADCDCDE No match EFFG @@ -5276,8 +5070,7 @@ No match 0: C /multiple words of text/i - *** Failers -No match +\= Expect no match AA No match UH-UH @@ -5423,8 +5216,7 @@ No match /(?<=a)b/ ab 0: b - *** Failers -No match +\= Expect no match cb No match b @@ -5489,8 +5281,7 @@ No match 0: Ab /(?:(?i)a)b/ - *** Failers -No match +\= Expect no match cb No match aB @@ -5515,8 +5306,7 @@ No match 0: Ab /(?i:a)b/ - *** Failers -No match +\= Expect no match aB No match aB @@ -5541,10 +5331,9 @@ No match 0: aB /(?:(?-i)a)b/i - *** Failers -No match aB 0: aB +\= Expect no match Ab No match @@ -5559,8 +5348,7 @@ No match 0: aB /(?:(?-i)a)b/i - *** Failers -No match +\= Expect no match Ab No match AB @@ -5585,8 +5373,7 @@ No match 0: aB /(?-i:a)b/i - *** Failers -No match +\= Expect no match AB No match Ab @@ -5603,8 +5390,7 @@ No match 0: aB /(?-i:a)b/i - *** Failers -No match +\= Expect no match Ab No match AB @@ -5613,8 +5399,7 @@ No match /((?-i:a))b/i /((?-i:a.))b/i - *** Failers -No match +\= Expect no match AB No match a\nB @@ -5650,8 +5435,7 @@ No match 0: aaac /(?.*)(?<=(abcd|wxyz))/ alphabetabcd 0: alphabetabcd endingwxyz 0: endingwxyz - *** Failers -No match +\= Expect no match a rather long string that doesn't end with one of them No match /word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ word cat dog elephant mussel cow horse canary baboon snake shark otherword 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark No match /word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope No match @@ -5869,8 +5647,7 @@ No match 0: foo 123999foo 0: foo - *** Failers -No match +\= Expect no match 123abcfoo No match @@ -5879,8 +5656,7 @@ No match 0: foo 123999foo 0: foo - *** Failers -No match +\= Expect no match 123abcfoo No match @@ -5889,8 +5665,7 @@ No match 0: foo 123456foo 0: foo - *** Failers -No match +\= Expect no match 123999foo No match @@ -5899,8 +5674,7 @@ No match 0: foo 123456foo 0: foo - *** Failers -No match +\= Expect no match 123999foo No match @@ -5948,8 +5722,7 @@ No match 0: - 0digit 0: 0 - *** Failers -No match +\= Expect no match bcdef No match @@ -5978,7 +5751,8 @@ No match 0: x /(?!^)x/m - a\nxb\n +\= Expect no match + a\nxb\n No match /abc\Qabc\Eabc/ @@ -5992,8 +5766,7 @@ No match / abc\Q abc\Eabc/x abc abcabc 0: abc abcabc - *** Failers -No match +\= Expect no match abcabcabc No match @@ -6033,8 +5806,7 @@ No match /\Gabc/ abc 0: abc - *** Failers -No match +\= Expect no match xyzabc No match @@ -6052,8 +5824,7 @@ No match /a(?x: b c )d/ XabcdY 0: abcd - *** Failers -No match +\= Expect no match Xa b c d Y No match @@ -6066,8 +5837,7 @@ No match /(?i)AB(?-i)C/ XabCY 0: abC - *** Failers -No match +\= Expect no match XabcY No match @@ -6076,8 +5846,7 @@ No match 0: abCE DE 0: DE - *** Failers -No match +\= Expect no match abcE No match abCe @@ -6098,12 +5867,12 @@ No match 0: d ] 0: ] - *** Failers - 0: a +\= Expect no match b No match /(a+)*b/ +\= Expect no match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match @@ -6150,22 +5919,21 @@ Failed: error -40: backreference condition or recursion test is not supported fo /line\nbreak/firstline this is a line\nbreak 0: line\x0abreak - ** Failers -No match +\= Expect no match line one\nthis is a line\nbreak in the second line No match /line\nbreak/m,firstline this is a line\nbreak 0: line\x0abreak - ** Failers -No match +\= Expect no match line one\nthis is a line\nbreak in the second line No match /1234/ 123\=ps Partial match: 123 +\= Expect no match a4\=ps,dfa_restart No match @@ -6238,8 +6006,7 @@ Subject length lower bound = 3 0: abc xyz\r\nabc 0: abc - ** Failers -No match +\= Expect no match xyz\rabc No match xyzabc\r @@ -6260,8 +6027,7 @@ Last code unit = 'c' Subject length lower bound = 3 xyz\r\nabclf> 0: abc - ** Failers -No match +\= Expect no match xyz\nabclf No match xyz\rabclf @@ -6276,8 +6042,7 @@ Last code unit = 'c' Subject length lower bound = 3 xyz\rabc 0: abc - ** Failers -No match +\= Expect no match xyz\nabc No match xyz\r\nabc @@ -6387,50 +6152,49 @@ Subject length lower bound = 5 3: a /(a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -No match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match /(?>a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -No match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match /(?:a|)*\d/ - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -No match aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match /^a.b/newline=lf a\rb 0: a\x0db - ** Failers -No match +\= Expect no match a\nb No match /^a.b/newline=cr a\nb 0: a\x0ab - ** Failers -No match +\= Expect no match a\rb No match /^a.b/newline=anycrlf a\x85b 0: a\x85b - ** Failers -No match +\= Expect no match a\rb No match /^a.b/newline=any - ** Failers -No match +\= Expect no match a\nb No match a\rb @@ -6471,8 +6235,7 @@ No match 0: a\x0cb a\x85b 0: a\x85b - ** Failers -No match +\= Expect no match a\n\rb No match @@ -6513,8 +6276,7 @@ No match 0: a\x0a\x0db a\n\r\x85\x0cb 0: a\x0a\x0d\x85\x0cb - ** Failers -No match +\= Expect no match ab No match @@ -6533,8 +6295,7 @@ No match 0: a\x0a\x0d\x0a\x0db a\n\n\r\nb 0: a\x0a\x0a\x0d\x0ab - ** Failers -No match +\= Expect no match a\n\n\n\rb No match a\r @@ -6543,8 +6304,7 @@ No match /.+foo/ afoo 0: afoo - ** Failers -No match +\= Expect no match \r\nfoo No match \nfoo @@ -6555,16 +6315,14 @@ No match 0: afoo \nfoo 0: \x0afoo - ** Failers -No match +\= Expect no match \r\nfoo No match /.+foo/newline=any afoo 0: afoo - ** Failers -No match +\= Expect no match \nfoo No match \r\nfoo @@ -6583,16 +6341,14 @@ No match 0: abc\n\rxyz 0: - ** Failers -No match +\= Expect no match abc\r\nxyz No match /^X/m XABC 0: X - ** Failers -No match +\= Expect no match XABC\=notbol No match @@ -6639,8 +6395,7 @@ No match 0: abcabc xyzabc 0: xyzabc - ** Failers -No match +\= Expect no match xyzxyz No match @@ -6649,8 +6404,7 @@ No match 0: X X\x0a X\x09X\x0b 0: X\x09X\x0b - ** Failers -No match +\= Expect no match \xa0 X\x0a No match @@ -6661,8 +6415,7 @@ No match 0: \x09 \xa0\x0a\x0b\x0c\x0d \x09\x20\xa0\x0a\x0b\x0c 0: \x09 \xa0\x0a\x0b\x0c - ** Failers -No match +\= Expect no match \x09\x20\xa0\x0a\x0b No match @@ -6682,8 +6435,7 @@ No match 0: XNNNYZ > X NYQZ 0: X NYQZ - ** Failers -No match +\= Expect no match >XYZ No match > X NY Z @@ -6696,6 +6448,7 @@ No match 0: \x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c /.+A/newline=crlf +\= Expect no match \r\nA No match @@ -6723,8 +6476,7 @@ Subject length lower bound = 3 0: a\x0ab a\r\nb 0: a\x0d\x0ab - ** Failers -No match +\= Expect no match a\x85b No match a\x0bb @@ -6759,8 +6511,7 @@ Subject length lower bound = 2 0: a\x0ab a\r\nb 0: a\x0d\x0ab - ** Failers -No match +\= Expect no match a\x85b No match a\x0bb @@ -6795,12 +6546,11 @@ Subject length lower bound = 4 0: a\x0a\x0d\x0db a\r\n\r\n\r\n\r\nb 0: a\x0d\x0a\x0d\x0a\x0d\x0a\x0d\x0ab - ** Failers +\= Expect no match + a\x0b\x0bb No match a\x85\x85b No match - a\x0b\0bb -No match /a\R{2,4}b/I,bsr=unicode Capturing subpattern count = 0 @@ -6816,10 +6566,9 @@ Subject length lower bound = 4 0: a\x0d\x0a\x0a\x0d\x0db a\x85\x85b 0: a\x85\x85b - a\x0b\0bb -No match - ** Failers -No match + a\x0b\x0bb + 0: a\x0b\x0bb +\= Expect no match a\r\r\r\r\rb No match @@ -6828,20 +6577,17 @@ No match 0: abc /a[]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers -No match +\= Expect no match ab No match /a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers -No match +\= Expect no match ab No match /a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames - ** Failers -No match +\= Expect no match ab No match @@ -6850,8 +6596,7 @@ No match 0: aXb a\nb 0: a\x0ab - ** Failers -No match +\= Expect no match ab No match @@ -6860,16 +6605,14 @@ No match 0: aXb a\nX\nXb 0: a\x0aX\x0aXb - ** Failers -No match +\= Expect no match ab No match /X$/dollar_endonly X 0: X - ** Failers -No match +\= Expect no match X\n No match @@ -6894,8 +6637,7 @@ No match +2 ^ ^ z +3 ^ ^ 0: xyz - ** Failers -No match +\= Expect no match abc No match abcxypqr @@ -6912,20 +6654,7 @@ No match +2 ^ ^ z +3 ^ ^ 0: xyz - ** Failers ---->** Failers - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x - +0 ^ x -No match +\= Expect no match abc --->abc +0 ^ x @@ -6983,26 +6712,26 @@ No match --->"ab" +0 ^ ^ +1 ^ " - +2 ^^ ((?(?=[a])[^"])|b)* + +2 ^^ ( +21 ^^ " - +3 ^^ (?(?=[a])[^"]) + +3 ^^ (? +18 ^^ b - +5 ^^ (?=[a]) + +5 ^^ (?= +8 ^ [a] +11 ^^ ) +12 ^^ [^"] +16 ^ ^ ) +17 ^ ^ | +21 ^ ^ " - +3 ^ ^ (?(?=[a])[^"]) + +3 ^ ^ (? +18 ^ ^ b - +5 ^ ^ (?=[a]) + +5 ^ ^ (?= +8 ^ [a] -+19 ^ ^ ) ++19 ^ ^ )* +21 ^ ^ " - +3 ^ ^ (?(?=[a])[^"]) + +3 ^ ^ (? +18 ^ ^ b - +5 ^ ^ (?=[a]) + +5 ^ ^ (?= +8 ^ [a] +17 ^ ^ | +22 ^ ^ $ @@ -7018,12 +6747,14 @@ Partial match: 123999 0: 999Y /Z(*F)/ +\= Expect no match Z\=ps No match ZA\=ps No match /Z(?!)/ +\= Expect no match Z\=ps No match ZA\=ps @@ -7056,6 +6787,7 @@ Partial match: dogs /Z(*F)Q|ZXY/ Z\=ps Partial match: Z +\= Expect no match ZA\=ps No match X\=ps @@ -7124,8 +6856,7 @@ Subject length lower bound = 3 0: abc the quick xyz brown fox 0: xyz - ** Failers -No match +\= Expect no match thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd No match @@ -7137,8 +6868,7 @@ Subject length lower bound = 0 0: abc the quick xyz brown fox 0: xyz - ** Failers -No match +\= Expect no match thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd No match @@ -7184,8 +6914,7 @@ Partial match: abcde 0: BC CCD 0: CC - ** Failers -No match +\= Expect no match CAD No match @@ -7194,8 +6923,7 @@ No match 0: CC BCD 0: BC - ** Failers -No match +\= Expect no match ABCD No match CAD @@ -7208,8 +6936,6 @@ No match Failed: error -42: pattern contains an item that is not supported for DFA matching /^(?=a(*SKIP)b|ac)/ - ** Failers -No match ac Failed: error -42: pattern contains an item that is not supported for DFA matching @@ -7220,10 +6946,6 @@ Failed: error -42: pattern contains an item that is not supported for DFA matchi /^(?=a(*PRUNE)b)/ ab Failed: error -42: pattern contains an item that is not supported for DFA matching - ** Failers -No match - ac -Failed: error -42: pattern contains an item that is not supported for DFA matching /^(?(?!a(*SKIP)b))/ ac @@ -7281,12 +7003,13 @@ Partial match: abc Partial match: abc /abc\B/ - abc -No match abc\=ps Partial match: abc abc\=ph Partial match: abc +\= Expect no match + abc +No match /.+/ abc\=offset=0 @@ -7295,18 +7018,19 @@ Partial match: abc 0: bc abc\=offset=2 0: c - abc\=offset=3 -No match +\= Bad offsets abc\=offset=4 Failed: error -33: bad offset value abc\=offset=-4 ** Invalid value in 'offset=-4' +\= Expect no match + abc\=offset=3 +No match /^(?:a)++\w/ aaaab 0: aaaab - ** Failers -No match +\= Expect no match aaaa No match bbb @@ -7318,8 +7042,7 @@ No match 1: aa aaaa 0: aa - ** Failers -No match +\= Expect no match bbb No match @@ -7328,16 +7051,14 @@ No match 0: aaaab bbb 0: b - ** Failers -No match +\= Expect no match aaaa No match /^(a)++\w/ aaaab 0: aaaab - ** Failers -No match +\= Expect no match aaaa No match bbb @@ -7346,8 +7067,7 @@ No match /^(a|)++\w/ aaaab 0: aaaab - ** Failers -No match +\= Expect no match aaaa No match bbb @@ -7357,8 +7077,7 @@ No match abcabcabc 0: abc 0+ abcabc - ** Failers -No match +\= Expect no match xyz No match @@ -7366,8 +7085,7 @@ No match abcabcabc 0: abc 0+ abcabc - ** Failers -No match +\= Expect no match xyz No match @@ -7375,8 +7093,7 @@ No match abcabcabc 0: abc 0+ abcabc - ** Failers -No match +\= Expect no match xyz No match @@ -7385,8 +7102,7 @@ No match 0: xyz /(?=abc){1}xyz/ - ** Failers -No match +\= Expect no match xyz No match @@ -7438,7 +7154,7 @@ Failed: error -52: nested recursion at the same subject position aaaabcde 0: aaaab -/((?(R2)a+|(?1)b))/ +/((?(R2)a+|(?1)b))()/ aaaabcde Failed: error -40: backreference condition or recursion test is not supported for DFA matching @@ -7458,14 +7174,6 @@ Matched, but offsets vector is too small to show all matches 2: aa 3: a -/ab\Cde/ - abXde - 0: abXde - -/(?<=ab\Cde)X/ - abZdeX - 0: X - /^\R/ \r\=ps 0: \x0d @@ -7840,7 +7548,7 @@ Callout (10): {AB} last capture = 0 Bra ^ Cond - Callout 25 9 7 + Callout 25 9 3 Assert abc Ket @@ -7853,11 +7561,11 @@ Callout (10): {AB} last capture = 0 ------------------------------------------------------------------ abcdefg --->abcdefg - 25 ^ (?=abc) + 25 ^ (?= 0: abcd xyz123 --->xyz123 - 25 ^ (?=abc) + 25 ^ (?= 0: xyz /^(?(?C$abc$)(?=abc)abcd|xyz)/B @@ -7865,7 +7573,7 @@ Callout (10): {AB} last capture = 0 Bra ^ Cond - CalloutStr $abc$ 7 12 7 + CalloutStr $abc$ 7 12 3 Assert abc Ket @@ -7879,12 +7587,12 @@ Callout (10): {AB} last capture = 0 abcdefg Callout (7): $abc$ --->abcdefg - ^ (?=abc) + ^ (?= 0: abcd xyz123 Callout (7): $abc$ --->xyz123 - ^ (?=abc) + ^ (?= 0: xyz /^ab(?C'first')cd(?C"second")ef/ @@ -7901,18 +7609,17 @@ Callout (20): "second" aaaXY Callout (8): `code` --->aaaXY - ^^ ) + ^^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} Callout (8): `code` --->aaaXY - ^ ^ ) + ^ ^ ){3} 0: aaaX # Binary zero in callout string -# a ( ? C ' x z ' ) b -/ 61 28 3f 43 27 78 00 7a 27 29 62/hex +/"a(?C'x" 00 "z')b"/hex abcdefgh Callout (5): 'x\x00z' --->abcdefgh @@ -7922,6 +7629,7 @@ Callout (5): 'x\x00z' /(?(?!)a|b)/ bbb 0: b +\= Expect no match aaa No match @@ -7938,4 +7646,45 @@ No match 0: 0: +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 0: abc + 1234abcde\=offset_limit=9 + 0: abc + 1234abcde\=offset_limit=4 + 0: abc + 1234abcde\=offset_limit=4,offset=4 + 0: abc +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 +No match + 1234abcde\=offset_limit=3 +No match + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 + 0: +\= Expect no match + 1234abc\=offset_limit=6 +No match + +/abcd/null_context + abcd\=null_context + 0: abcd + +/()()a+/no_auto_possess + aaa\=allcaptures +** Ignored after DFA matching: allcaptures + 0: aaa + 1: aa + 2: a + a\=allcaptures +** Ignored after DFA matching: allcaptures + 0: a + +/(*LIMIT_RECURSION=100)^((.)(?1)|.)$/ +\= Expect recursion limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] +Failed: error -53: recursion limit exceeded + # End of testinput6 diff --git a/pcre2/testdata/testoutput7 b/pcre2/testdata/testoutput7 index a7f6a62bb..f80418516 100644 --- a/pcre2/testdata/testoutput7 +++ b/pcre2/testdata/testoutput7 @@ -3,6 +3,7 @@ # used to force DFA matching for all tests. #subject dfa +#newline_default LF any anyCRLF /\x{100}ab/utf \x{100}ab @@ -21,8 +22,7 @@ 0: a\x{100}b a\x{100}\x{100}b 0: a\x{100}\x{100}b - *** Failers -No match +\= Expect no match ab No match @@ -33,16 +33,14 @@ No match 0: X \x{300}Xoanon 0: X - *** Failers -No match +\= Expect no match YXoanon No match /\BX/utf YXoanon 0: X - *** Failers -No match +\= Expect no match Xoanon No match +Xoanon @@ -57,16 +55,14 @@ No match 0: X FAX 0: X - *** Failers -No match +\= Expect no match Xoanon No match /X\B/utf Xoanon 0: X - *** Failers -No match +\= Expect no match X+oanon No match ZX\x{300}oanon @@ -87,8 +83,7 @@ No match 0: \x{123}\x{123}4 \x{400}\x{401}\x{402}6 0: \x{400}\x{401}\x{402}6 - *** Failers -No match +\= Expect no match d99 No match \x{123}\x{122}4 @@ -105,8 +100,7 @@ No match 0: a\x{7f}b a\x{100}b 0: a\x{100}b - *** Failers -No match +\= Expect no match a\nb No match @@ -117,8 +111,7 @@ No match 0: a\x{4000}\x{7f}yb a\x{4000}\x{100}yb 0: a\x{4000}\x{100}yb - *** Failers -No match +\= Expect no match a\x{4000}b No match ac\ncb @@ -181,8 +174,7 @@ No match 0: a\x{1234}\x{4321}yb a\x{1234}\x{4321}\x{3412}b 0: a\x{1234}\x{4321}\x{3412}b - *** Failers -No match +\= Expect no match a\x{1234}b No match ac\ncb @@ -200,8 +192,7 @@ No match 1: axxxxb a\x{1234}\x{4321}\x{3412}\x{3421}b 0: a\x{1234}\x{4321}\x{3412}\x{3421}b - *** Failers -No match +\= Expect no match a\x{1234}b No match @@ -217,8 +208,7 @@ No match 1: axxxxb a\x{1234}\x{4321}\x{3412}\x{3421}b 0: a\x{1234}\x{4321}\x{3412}\x{3421}b - *** Failers -No match +\= Expect no match a\x{1234}b No match @@ -237,8 +227,7 @@ No match 0: axbxxb axxxxxbcdefghijb 0: axxxxxb - *** Failers -No match +\= Expect no match a\x{1234}b No match axxxxxxbcdefghijb @@ -259,16 +248,14 @@ No match 0: axbxxb axxxxxbcdefghijb 0: axxxxxb - *** Failers -No match +\= Expect no match a\x{1234}b No match axxxxxxbcdefghijb No match /^[a\x{c0}]/utf - *** Failers -No match +\= Expect no match \x{100} No match @@ -287,8 +274,7 @@ No match /(?:\x{100}){3}b/utf \x{100}\x{100}\x{100}b 0: \x{100}\x{100}\x{100}b - *** Failers -No match +\= Expect no match \x{100}\x{100}b No match @@ -297,8 +283,7 @@ No match 0: \x{ab} \xc2\xab 0: \x{ab} - *** Failers -No match +\= Expect no match \x00{ab} No match @@ -307,8 +292,7 @@ No match 0: X \x{256}XYZ 0: X - *** Failers -No match +\= Expect no match XYZ No match @@ -467,8 +451,7 @@ Matched, but offsets vector is too small to show all matches /\D+/utf 12abcd34 0: abcd - *** Failers - 0: *** Failers +\= Expect no match 1234 No match @@ -477,8 +460,7 @@ No match 0: abc 12ab34 0: ab - *** Failers - 0: *** +\= Expect no match 1234 No match 12a34 @@ -490,9 +472,7 @@ No match 1: ab 12ab34 0: ab - *** Failers - 0: *** - 1: ** +\= Expect no match 1234 No match 12a34 @@ -501,16 +481,13 @@ No match /\d+/utf 12abcd34 0: 12 - *** Failers -No match /\d{2,3}/utf 12abcd34 0: 12 1234abcd 0: 123 - *** Failers -No match +\= Expect no match 1.4 No match @@ -520,16 +497,14 @@ No match 1234abcd 0: 123 1: 12 - *** Failers -No match +\= Expect no match 1.4 No match /\S+/utf 12abcd34 0: 12abcd34 - *** Failers - 0: *** +\= Expect no match \ \ No match @@ -538,8 +513,7 @@ No match 0: 12a 1234abcd 0: 123 - *** Failers - 0: *** +\= Expect no match \ \ No match @@ -550,25 +524,20 @@ No match 1234abcd 0: 123 1: 12 - *** Failers - 0: *** - 1: ** +\= Expect no match \ \ No match />\s+ <34 0: > < - *** Failers -No match />\s{2,3} < ab> < - *** Failers -No match +\= Expect no match ab> < ab> < - *** Failers -No match +\= Expect no match ab> \p{Xsp}/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} - ** Failers -No match +\= Expect no match \x{0b} No match @@ -2951,8 +2762,7 @@ No match 0: >\x{1680} >\x{a0} 0: >\x{a0} - ** Failers -No match +\= Expect no match \x{0b} No match @@ -3006,8 +2816,7 @@ No match 0: \x{10a7} _ABC 0: _ - ** Failers -No match +\= Expect no match [] No match @@ -3036,8 +2845,7 @@ No match 0: \x{10a7} _ABC 0: _ - ** Failers -No match +\= Expect no match [] No match @@ -3064,8 +2872,7 @@ No match /\b...\B/utf abc_ 0: abc - ** Failers - 0: Fai +\= Expect no match \x{37e}abc\x{376} No match \x{37e}\x{376}\x{371}\x{393}\x{394} @@ -3170,9 +2977,7 @@ Partial match: AA \x{1b04}\x{1b04}X 0: \x{1b04}\x{1b04} 0+ X - *These match up to the roman letters - 0: * - 0+ These match up to the roman letters +\= These match up to the roman letters \x{1111}\x{1111}L,L 0: \x{1111}\x{1111} 0+ L,L @@ -3203,9 +3008,7 @@ Partial match: AA \x{ad89}\x{11fe}\x{11fe}LVT, T, T 0: \x{ad89}\x{11fe}\x{11fe} 0+ LVT, T, T - *These match just the first codepoint (invalid sequence) - 0: * - 0+ These match just the first codepoint (invalid sequence) +\= These match just the first codepoint (invalid sequence) \x{1111}\x{11fe}L, T 0: \x{1111} 0+ \x{11fe}L, T @@ -3251,9 +3054,7 @@ Partial match: AA \x{11fe}\x{ad89}T, LVT 0: \x{11fe} 0+ \x{ad89}T, LVT - *Test extend and spacing mark - 0: * - 0+ Test extend and spacing mark +\= Test extend and spacing mark \x{1111}\x{ae4c}\x{0711}L, LV, extend 0: \x{1111}\x{ae4c}\x{711} 0+ L, LV, extend @@ -3263,9 +3064,7 @@ Partial match: AA \x{1111}\x{ae4c}\x{1b04}\x{0711}\x{1b04}L, LV, spacing mark, extend, spacing mark 0: \x{1111}\x{ae4c}\x{1b04}\x{711}\x{1b04} 0+ L, LV, spacing mark, extend, spacing mark - *Test CR, LF, and control - 0: * - 0+ Test CR, LF, and control +\= Test CR, LF, and control \x0d\x{0711}CR, extend 0: \x{0d} 0+ \x{711}CR, extend @@ -3284,9 +3083,7 @@ Partial match: AA \x09\x{1b04}Control, spacingmark 0: \x{09} 0+ \x{1b04}Control, spacingmark - *There are no Prepend characters, so we can't test Prepend, CR - 0: * - 0+ There are no Prepend characters, so we can't test Prepend, CR +\= There are no Prepend characters, so we can't test Prepend, CR /^(?>\X{2})X/utf,aftertext \x{1111}\x{ae4c}\x{1111}\x{ae4c}X @@ -3366,7 +3163,6 @@ Partial match: AA \x{00c5}\x{00e5}\x{212b} 0: \x{c5}\x{e5}\x{212b} - /\x{01c4}+/i,utf \x{01c4}\x{01c5}\x{01c6} 0: \x{1c4}\x{1c5}\x{1c6} @@ -3379,7 +3175,6 @@ Partial match: AA \x{01c4}\x{01c5}\x{01c6} 0: \x{1c4}\x{1c5}\x{1c6} - /\x{01c7}+/i,utf \x{01c7}\x{01c8}\x{01c9} 0: \x{1c7}\x{1c8}\x{1c9} @@ -3405,7 +3200,6 @@ Partial match: AA \x{01ca}\x{01cb}\x{01cc} 0: \x{1ca}\x{1cb}\x{1cc} - /\x{01f1}+/i,utf \x{01f1}\x{01f2}\x{01f3} 0: \x{1f1}\x{1f2}\x{1f3} @@ -3418,7 +3212,6 @@ Partial match: AA \x{01f1}\x{01f2}\x{01f3} 0: \x{1f1}\x{1f2}\x{1f3} - /\x{0345}+/i,utf \x{0345}\x{0399}\x{03b9}\x{1fbe} 0: \x{345}\x{399}\x{3b9}\x{1fbe} @@ -3435,7 +3228,6 @@ Partial match: AA \x{0345}\x{0399}\x{03b9}\x{1fbe} 0: \x{345}\x{399}\x{3b9}\x{1fbe} - /\x{0392}+/i,utf \x{0392}\x{03b2}\x{03d0} 0: \x{392}\x{3b2}\x{3d0} @@ -3461,7 +3253,6 @@ Partial match: AA \x{0395}\x{03b5}\x{03f5} 0: \x{395}\x{3b5}\x{3f5} - /\x{0398}+/i,utf \x{0398}\x{03b8}\x{03d1}\x{03f4} 0: \x{398}\x{3b8}\x{3d1}\x{3f4} @@ -3477,7 +3268,6 @@ Partial match: AA /\x{03f4}+/i,utf \x{0398}\x{03b8}\x{03d1}\x{03f4} 0: \x{398}\x{3b8}\x{3d1}\x{3f4} - /\x{039a}+/i,utf \x{039a}\x{03ba}\x{03f0} @@ -3491,7 +3281,6 @@ Partial match: AA \x{039a}\x{03ba}\x{03f0} 0: \x{39a}\x{3ba}\x{3f0} - /\x{03a0}+/i,utf \x{03a0}\x{03c0}\x{03d6} 0: \x{3a0}\x{3c0}\x{3d6} @@ -3504,7 +3293,6 @@ Partial match: AA \x{03a0}\x{03c0}\x{03d6} 0: \x{3a0}\x{3c0}\x{3d6} - /\x{03a1}+/i,utf \x{03a1}\x{03c1}\x{03f1} 0: \x{3a1}\x{3c1}\x{3f1} @@ -3517,7 +3305,6 @@ Partial match: AA \x{03a1}\x{03c1}\x{03f1} 0: \x{3a1}\x{3c1}\x{3f1} - /\x{03a3}+/i,utf \x{03A3}\x{03C2}\x{03C3} 0: \x{3a3}\x{3c2}\x{3c3} @@ -3529,7 +3316,6 @@ Partial match: AA /\x{03c3}+/i,utf \x{03A3}\x{03C2}\x{03C3} 0: \x{3a3}\x{3c2}\x{3c3} - /\x{03a6}+/i,utf \x{03a6}\x{03c6}\x{03d5} @@ -3543,7 +3329,6 @@ Partial match: AA \x{03a6}\x{03c6}\x{03d5} 0: \x{3a6}\x{3c6}\x{3d5} - /\x{03c9}+/i,utf \x{03c9}\x{03a9}\x{2126} 0: \x{3c9}\x{3a9}\x{2126} @@ -3556,7 +3341,6 @@ Partial match: AA \x{03c9}\x{03a9}\x{2126} 0: \x{3c9}\x{3a9}\x{2126} - /\x{1e60}+/i,utf \x{1e60}\x{1e61}\x{1e9b} 0: \x{1e60}\x{1e61}\x{1e9b} @@ -3569,7 +3353,6 @@ Partial match: AA \x{1e60}\x{1e61}\x{1e9b} 0: \x{1e60}\x{1e61}\x{1e9b} - /\x{1e9e}+/i,utf \x{1e9e}\x{00df} 0: \x{1e9e}\x{df} @@ -3578,7 +3361,6 @@ Partial match: AA \x{1e9e}\x{00df} 0: \x{1e9e}\x{df} - /\x{1f88}+/i,utf \x{1f88}\x{1f80} 0: \x{1f88}\x{1f80} @@ -3599,7 +3381,6 @@ Partial match: AA \x{004b}\x{006b}\x{212a} 0: Kk\x{212a} - /\x{0053}+/i,utf \x{0053}\x{0073}\x{017f} 0: Ss\x{17f} @@ -3613,24 +3394,29 @@ Partial match: AA 0: Ss\x{17f} /ist/i,utf +\= Expect no match ikt No match /is+t/i,utf iSs\x{17f}t 0: iSs\x{17f}t +\= Expect no match ikt No match /is+?t/i,utf +\= Expect no match ikt No match /is?t/i,utf +\= Expect no match ikt No match /is{2}t/i,utf +\= Expect no match iskt No match @@ -3643,16 +3429,14 @@ No match 0: ` \x{1234}abc 0: \x{1234} - ** Failers -No match +\= Expect no match abc No match /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} - ** Failers -No match +\= Expect no match \x{9f} No match @@ -3664,32 +3448,28 @@ No match 3: $@` 4: $@ 5: $ - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000}* - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} - ** Failers -No match +\= Expect no match \x{9f} No match /^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234} - ** Failers -No match +\= Expect no match \x{9f} No match @@ -3698,32 +3478,28 @@ No match 0: $@`\x{a0}\x{1234} 1: $@`\x{a0} 2: $@` - ** Failers -No match +\= Expect no match \x{9f} No match /^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** 0: $ - ** Failers -No match +\= Expect no match \x{9f} No match /^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} - ** Failers -No match +\= Expect no match \x{9f} No match /^\P{Xuc}/utf abc 0: a - ** Failers - 0: * +\= Expect no match $abc No match @abc @@ -3736,8 +3512,7 @@ No match /^[\P{Xuc}]/utf abc 0: a - ** Failers - 0: * +\= Expect no match $abc No match @abc diff --git a/pcre2/testdata/testoutput8-16 b/pcre2/testdata/testoutput8-16-2 similarity index 81% rename from pcre2/testdata/testoutput8-16 rename to pcre2/testdata/testoutput8-16-2 index 0f9fa6a46..7c2a4b9cb 100644 --- a/pcre2/testdata/testoutput8-16 +++ b/pcre2/testdata/testoutput8-16-2 @@ -1,8 +1,11 @@ -# These are a few representative patterns whose lengths and offsets are to be -# shown when the link size is 2. This is just a doublecheck test to ensure the -# sizes don't go horribly wrong when something is changed. The pattern contents -# are all themselves checked in other tests. Unicode, including property -# support, is required for these tests. +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. #pattern fullbincode,memory @@ -378,7 +381,7 @@ Options: utf First code unit = 'A' Last code unit = '.' Subject length lower bound = 4 - + /\x{D55c}\x{ad6d}\x{C5B4}/I,utf Memory allocation (code space): 22 ------------------------------------------------------------------ @@ -842,11 +845,185 @@ Memory allocation (code space): 14 # Check the absolute limit on nesting (?| etc. This varies with code unit # width because the workspace is a different number of bytes. It will fail -# in 8-bit and 16-bit but not in 32-bit. - +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) /parens_nest_limit=1000,-fullbincode Failed: error 184 at offset 1540: (?| and/or (?J: or (?x: parentheses are too deeply nested +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5813: regular expression is too complicated + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5820: regular expression is too complicated + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + # End of testinput8 diff --git a/pcre2/testdata/testoutput8-16-3 b/pcre2/testdata/testoutput8-16-3 new file mode 100644 index 000000000..edad37ea8 --- /dev/null +++ b/pcre2/testdata/testoutput8-16-3 @@ -0,0 +1,1026 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation (code space): 32 +------------------------------------------------------------------ + 0 12 Bra + 3 6 CBra 1 + 7 /i b + 9 6 Ket + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 20 Bra + 3 8 CBra 1 + 7 AllAny* + 9 X + 11 6 Alt + 14 ^ + 15 B + 17 14 Ket + 20 20 Ket + 23 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation (code space): 46 +------------------------------------------------------------------ + 0 19 Bra + 3 7 Bra + 6 AllAny* + 8 X + 10 6 Alt + 13 ^ + 14 B + 16 13 Ket + 19 19 Ket + 22 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation (code space): 50 +------------------------------------------------------------------ + 0 21 Bra + 3 ^ + 4 [0-9A-Za-z] + 21 21 Ket + 24 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 3 Bra + 3 3 Ket + 6 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 x?+ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x++/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 x++ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation (code space): 24 +------------------------------------------------------------------ + 0 8 Bra + 3 x + 5 x{0,2}+ + 8 8 Ket + 11 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation (code space): 34 +------------------------------------------------------------------ + 0 13 Bra + 3 Braposzero + 4 6 CBraPos 1 + 8 x + 10 6 KetRpos + 13 13 Ket + 16 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation (code space): 166 +------------------------------------------------------------------ + 0 79 Bra + 3 ^ + 4 72 CBra 1 + 8 6 CBra 2 + 12 a+ + 14 6 Ket + 17 22 CBra 3 + 21 [ab]+? + 39 22 Ket + 42 22 CBra 4 + 46 [bc]+ + 64 22 Ket + 67 6 CBra 5 + 71 \w*+ + 73 6 Ket + 76 72 Ket + 79 79 Ket + 82 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 1652 +------------------------------------------------------------------ + 0 822 Bra + 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +821 \b +822 822 Ket +825 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 1632 +------------------------------------------------------------------ + 0 812 Bra + 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +811 \b +812 812 Ket +815 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation (code space): 42 +------------------------------------------------------------------ + 0 17 Bra + 3 11 CBra 1 + 7 a + 9 3 Recurse + 12 b + 14 11 Ket + 17 17 Ket + 20 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation (code space): 54 +------------------------------------------------------------------ + 0 23 Bra + 3 17 CBra 1 + 7 a + 9 6 Once + 12 3 Recurse + 15 6 KetRmax + 18 b + 20 17 Ket + 23 23 Ket + 26 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation (code space): 68 +------------------------------------------------------------------ + 0 30 Bra + 3 a + 5 6 CBra 1 + 9 b + 11 5 Alt + 14 c + 16 11 Ket + 19 d + 21 6 CBra 2 + 25 e + 27 6 Ket + 30 30 Ket + 33 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation (code space): 84 +------------------------------------------------------------------ + 0 38 Bra + 3 23 Bra + 6 a + 8 15 CBra 1 + 12 c + 14 6 CBra 2 + 18 d + 20 6 Ket + 23 15 Ket + 26 23 Ket + 29 6 CBra 3 + 33 a + 35 6 Ket + 38 38 Ket + 41 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation (code space): 64 +------------------------------------------------------------------ + 0 28 Bra + 3 6 CBra 1 + 7 a + 9 6 Ket + 12 Any + 13 Any + 14 Any + 15 \1 + 17 bbb + 23 3 Recurse + 26 d + 28 28 Ket + 31 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation (code space): 62 +------------------------------------------------------------------ + 0 27 Bra + 3 abc + 9 Callout 255 10 1 + 15 de + 19 Callout 0 16 1 + 25 f + 27 27 Ket + 30 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation (code space): 106 +------------------------------------------------------------------ + 0 49 Bra + 3 Callout 255 0 1 + 9 a + 11 Callout 255 1 1 + 17 b + 19 Callout 255 2 1 + 25 c + 27 Callout 255 3 1 + 33 d + 35 Callout 255 4 1 + 41 e + 43 Callout 255 5 0 + 49 49 Ket + 52 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{1000} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation (code space): 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{10000} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation (code space): 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{100000} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation (code space): 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{10ffff} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{ff} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{80} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{ff} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 11 Bra + 3 A\x{2262}\x{391}. + 11 11 Ket + 14 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation (code space): 26 +------------------------------------------------------------------ + 0 9 Bra + 3 \x{d55c}\x{ad6d}\x{c5b4} + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation (code space): 26 +------------------------------------------------------------------ + 0 9 Bra + 3 \x{65e5}\x{672c}\x{8a9e} + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation (code space): 60 +------------------------------------------------------------------ + 0 26 Bra + 3 [Z\x{100}] + 26 26 Ket + 29 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation (code space): 32 +------------------------------------------------------------------ + 0 12 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E]/utf +Memory allocation (code space): 32 +------------------------------------------------------------------ + 0 12 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation (code space): 66 +------------------------------------------------------------------ + 0 29 Bra + 3 [a-c\p{L}\x{660}] + 29 29 Ket + 32 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation (code space): 64 +------------------------------------------------------------------ + 0 28 Bra + 3 [+\-\p{Nd}]++ + 28 28 Ket + 31 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation (code space): 36 +------------------------------------------------------------------ + 0 14 Bra + 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation (code space): 36 +------------------------------------------------------------------ + 0 14 Bra + 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\x{104}-\x{109}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation (code space): 70 +------------------------------------------------------------------ + 0 31 Bra + 3 25 CBra 1 + 7 Brazero + 8 17 SCBra 2 + 12 7 Cond + 15 1 Cond ref + 17 0 + 19 3 Alt + 22 10 Ket + 25 17 KetRmax + 28 25 Ket + 31 31 Ket + 34 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation (code space): 56 +------------------------------------------------------------------ + 0 24 Bra + 3 18 CBra 1 + 7 Brazero + 8 7 SCond + 11 1 Cond ref + 13 0 + 15 3 Alt + 18 10 KetRmax + 21 18 Ket + 24 24 Ket + 27 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{aa} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{aa} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\x{aa}] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\x{aa}] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 11 Bra + 3 [^\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Cc}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{Cc}\P{L}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 12 Bra + 3 [\p{L}]++ + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Xsp}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 70 Bra + 3 abc + 9 6 CBra 1 + 13 d + 15 5 Alt + 18 e + 20 11 Ket + 23 *THEN + 24 x + 26 13 CBra 2 + 30 123 + 36 *THEN + 37 4 + 39 28 Alt + 42 567 + 48 6 CBra 3 + 52 b + 54 5 Alt + 57 q + 59 11 Ket + 62 *THEN + 63 xx + 67 41 Ket + 70 70 Ket + 73 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 52 Bra + 3 Brazero + 4 43 SCBra 1 + 8 36 Once + 11 15 CBra 2 + 15 8 CBra 3 + 19 a + 21 \2 + 23 8 Ket + 26 15 Alt + 29 6 CBra 4 + 33 a* + 35 6 Ket + 38 29 Recurse + 41 30 Ket + 44 36 Ket + 47 43 KetRmax + 50 a?+ + 52 52 Ket + 55 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 28 Bra + 3 22 Once + 6 16 CBra 1 + 10 13 Recurse + 13 6 CBra 2 + 17 \1 + 19 6 Ket + 22 16 Ket + 25 22 Ket + 28 28 Ket + 31 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 30 Bra + 3 6 Once + 6 21 Recurse + 9 6 Ket + 12 6 Once + 15 21 Recurse + 18 6 Ket + 21 6 CBra 1 + 25 a + 27 6 Ket + 30 30 Ket + 33 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 39 Bra + 3 Any + 4 25 Once + 7 10 CBra 1 + 11 32 Recurse + 14 0 Recurse + 17 5 Alt + 20 \1 + 22 4 Alt + 25 $ + 26 19 Ket + 29 25 Ket + 32 4 CBra 2 + 36 4 Ket + 39 39 Ket + 42 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 49 Bra + 3 Any + 4 35 Once + 7 20 CBra 1 + 11 42 Recurse + 14 0 Recurse + 17 4 CBra 2 + 21 4 Ket + 24 17 Recurse + 27 5 Alt + 30 \1 + 32 4 Alt + 35 $ + 36 29 Ket + 39 35 Ket + 42 4 CBra 3 + 46 4 Ket + 49 49 Ket + 52 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 69 Bra + 3 6 Recurse + 6 4 CBra 1 + 10 4 Ket + 13 53 CBra 2 + 17 43 CBra 3 + 21 36 CBra 4 + 25 29 CBra 5 + 29 20 CBra 6 + 33 13 CBra 7 + 37 6 Once + 40 \1+ + 43 6 Ket + 46 13 Ket + 49 20 Ket + 52 \x{85} + 54 29 KetRmax + 57 36 Ket + 60 3 Alt + 63 46 Ket + 66 53 Ket + 69 69 Ket + 72 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 110 Bra + 3 97 Once + 6 8 Cond + 9 1 Cond ref + 11 103 Recurse + 14 8 Ket + 17 8 Cond + 20 1 Cond ref + 22 103 Recurse + 25 8 Ket + 28 8 Cond + 31 1 Cond ref + 33 103 Recurse + 36 8 Ket + 39 8 Cond + 42 1 Cond ref + 44 103 Recurse + 47 8 Ket + 50 8 Cond + 53 1 Cond ref + 55 103 Recurse + 58 8 Ket + 61 8 Cond + 64 1 Cond ref + 66 103 Recurse + 69 8 Ket + 72 8 Cond + 75 1 Cond ref + 77 103 Recurse + 80 8 Ket + 83 14 SBraPos + 86 8 SCond + 89 1 Cond ref + 91 103 Recurse + 94 8 Ket + 97 14 KetRpos +100 97 Ket +103 4 CBra 1 +107 4 Ket +110 110 Ket +113 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 58 Bra + 3 45 Once + 6 5 Cond + 9 1 Cond ref + 11 10 Alt + 14 a + 16 51 Recurse + 19 b + 21 15 Ket + 24 21 SBraPos + 27 5 SCond + 30 1 Cond ref + 32 10 Alt + 35 a + 37 51 Recurse + 40 b + 42 15 Ket + 45 21 KetRpos + 48 45 Ket + 51 4 CBra 1 + 55 4 Ket + 58 58 Ket + 61 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 194 Bra + 3 61 CBra 1 + 7 3 Recurse + 10 131 Recurse + 13 138 Recurse + 16 145 Recurse + 19 152 Recurse + 22 159 Recurse + 25 166 Recurse + 28 173 Recurse + 31 180 Recurse + 34 180 Recurse + 37 173 Recurse + 40 166 Recurse + 43 159 Recurse + 46 152 Recurse + 49 145 Recurse + 52 138 Recurse + 55 131 Recurse + 58 3 Recurse + 61 0 Recurse + 64 61 Ket + 67 61 SCBra 1 + 71 3 Recurse + 74 131 Recurse + 77 138 Recurse + 80 145 Recurse + 83 152 Recurse + 86 159 Recurse + 89 166 Recurse + 92 173 Recurse + 95 180 Recurse + 98 180 Recurse +101 173 Recurse +104 166 Recurse +107 159 Recurse +110 152 Recurse +113 145 Recurse +116 138 Recurse +119 131 Recurse +122 3 Recurse +125 0 Recurse +128 61 KetRmax +131 4 CBra 2 +135 4 Ket +138 4 CBra 3 +142 4 Ket +145 4 CBra 4 +149 4 Ket +152 4 CBra 5 +156 4 Ket +159 4 CBra 6 +163 4 Ket +166 4 CBra 7 +170 4 Ket +173 4 CBra 8 +177 4 Ket +180 4 CBra 9 +184 4 Ket +187 4 CBra 10 +191 4 Ket +194 194 Ket +197 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +# End of testinput8 diff --git a/pcre2/testdata/testoutput8-32 b/pcre2/testdata/testoutput8-32-2 similarity index 81% rename from pcre2/testdata/testoutput8-32 rename to pcre2/testdata/testoutput8-32-2 index 1b843a599..e40d9d44b 100644 --- a/pcre2/testdata/testoutput8-32 +++ b/pcre2/testdata/testoutput8-32-2 @@ -1,8 +1,11 @@ -# These are a few representative patterns whose lengths and offsets are to be -# shown when the link size is 2. This is just a doublecheck test to ensure the -# sizes don't go horribly wrong when something is changed. The pattern contents -# are all themselves checked in other tests. Unicode, including property -# support, is required for these tests. +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. #pattern fullbincode,memory @@ -378,7 +381,7 @@ Options: utf First code unit = 'A' Last code unit = '.' Subject length lower bound = 4 - + /\x{D55c}\x{ad6d}\x{C5B4}/I,utf Memory allocation (code space): 44 ------------------------------------------------------------------ @@ -842,10 +845,184 @@ Memory allocation (code space): 28 # Check the absolute limit on nesting (?| etc. This varies with code unit # width because the workspace is a different number of bytes. It will fail -# in 8-bit and 16-bit but not in 32-bit. - +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) /parens_nest_limit=1000,-fullbincode +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5813: regular expression is too complicated + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5820: regular expression is too complicated + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + # End of testinput8 diff --git a/pcre2/testdata/testoutput8-32-3 b/pcre2/testdata/testoutput8-32-3 new file mode 100644 index 000000000..e40d9d44b --- /dev/null +++ b/pcre2/testdata/testoutput8-32-3 @@ -0,0 +1,1028 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 5 CBra 1 + 5 /i b + 7 5 Ket + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation (code space): 76 +------------------------------------------------------------------ + 0 16 Bra + 2 7 CBra 1 + 5 AllAny* + 7 X + 9 5 Alt + 11 ^ + 12 B + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation (code space): 72 +------------------------------------------------------------------ + 0 15 Bra + 2 6 Bra + 4 AllAny* + 6 X + 8 5 Alt + 10 ^ + 11 B + 13 11 Ket + 15 15 Ket + 17 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation (code space): 60 +------------------------------------------------------------------ + 0 12 Bra + 2 ^ + 3 [0-9A-Za-z] + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation (code space): 20 +------------------------------------------------------------------ + 0 2 Bra + 2 2 Ket + 4 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x?+ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x++/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x++ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation (code space): 40 +------------------------------------------------------------------ + 0 7 Bra + 2 x + 4 x{0,2}+ + 7 7 Ket + 9 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 Braposzero + 3 5 CBraPos 1 + 6 x + 8 5 KetRpos + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation (code space): 220 +------------------------------------------------------------------ + 0 52 Bra + 2 ^ + 3 47 CBra 1 + 6 5 CBra 2 + 9 a+ + 11 5 Ket + 13 13 CBra 3 + 16 [ab]+? + 26 13 Ket + 28 13 CBra 4 + 31 [bc]+ + 41 13 Ket + 43 5 CBra 5 + 46 \w*+ + 48 5 Ket + 50 47 Ket + 52 52 Ket + 54 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 3296 +------------------------------------------------------------------ + 0 821 Bra + 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +820 \b +821 821 Ket +823 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 3256 +------------------------------------------------------------------ + 0 811 Bra + 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +810 \b +811 811 Ket +813 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation (code space): 64 +------------------------------------------------------------------ + 0 13 Bra + 2 9 CBra 1 + 5 a + 7 2 Recurse + 9 b + 11 9 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation (code space): 80 +------------------------------------------------------------------ + 0 17 Bra + 2 13 CBra 1 + 5 a + 7 4 Once + 9 2 Recurse + 11 4 KetRmax + 13 b + 15 13 Ket + 17 17 Ket + 19 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation (code space): 108 +------------------------------------------------------------------ + 0 24 Bra + 2 a + 4 5 CBra 1 + 7 b + 9 4 Alt + 11 c + 13 9 Ket + 15 d + 17 5 CBra 2 + 20 e + 22 5 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation (code space): 128 +------------------------------------------------------------------ + 0 29 Bra + 2 18 Bra + 4 a + 6 12 CBra 1 + 9 c + 11 5 CBra 2 + 14 d + 16 5 Ket + 18 12 Ket + 20 18 Ket + 22 5 CBra 3 + 25 a + 27 5 Ket + 29 29 Ket + 31 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation (code space): 108 +------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 + 5 a + 7 5 Ket + 9 Any + 10 Any + 11 Any + 12 \1 + 14 bbb + 20 2 Recurse + 22 d + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation (code space): 100 +------------------------------------------------------------------ + 0 22 Bra + 2 abc + 8 Callout 255 10 1 + 12 de + 16 Callout 0 16 1 + 20 f + 22 22 Ket + 24 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation (code space): 156 +------------------------------------------------------------------ + 0 36 Bra + 2 Callout 255 0 1 + 6 a + 8 Callout 255 1 1 + 12 b + 14 Callout 255 2 1 + 18 c + 20 Callout 255 3 1 + 24 d + 26 Callout 255 4 1 + 30 e + 32 Callout 255 5 0 + 36 36 Ket + 38 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{1000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10ffff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{80} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 A\x{2262}\x{391}. + 10 10 Ket + 12 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation (code space): 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{d55c}\x{ad6d}\x{c5b4} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation (code space): 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{65e5}\x{672c}\x{8a9e} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation (code space): 76 +------------------------------------------------------------------ + 0 16 Bra + 2 [Z\x{100}] + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E]/utf +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation (code space): 88 +------------------------------------------------------------------ + 0 19 Bra + 2 [a-c\p{L}\x{660}] + 19 19 Ket + 21 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation (code space): 84 +------------------------------------------------------------------ + 0 18 Bra + 2 [+\-\p{Nd}]++ + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation (code space): 60 +------------------------------------------------------------------ + 0 12 Bra + 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation (code space): 60 +------------------------------------------------------------------ + 0 12 Bra + 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\x{104}-\x{109}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation (code space): 104 +------------------------------------------------------------------ + 0 23 Bra + 2 19 CBra 1 + 5 Brazero + 6 13 SCBra 2 + 9 6 Cond + 11 1 Cond ref + 13 0 + 15 2 Alt + 17 8 Ket + 19 13 KetRmax + 21 19 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation (code space): 84 +------------------------------------------------------------------ + 0 18 Bra + 2 14 CBra 1 + 5 Brazero + 6 6 SCond + 8 1 Cond ref + 10 0 + 12 2 Alt + 14 8 KetRmax + 16 14 Ket + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 9 Bra + 2 [^\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Cc}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{Cc}\P{L}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 10 Bra + 2 [\p{L}]++ + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Xsp}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 60 Bra + 2 abc + 8 5 CBra 1 + 11 d + 13 4 Alt + 15 e + 17 9 Ket + 19 *THEN + 20 x + 22 12 CBra 2 + 25 123 + 31 *THEN + 32 4 + 34 24 Alt + 36 567 + 42 5 CBra 3 + 45 b + 47 4 Alt + 49 q + 51 9 Ket + 53 *THEN + 54 xx + 58 36 Ket + 60 60 Ket + 62 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 39 Bra + 2 Brazero + 3 32 SCBra 1 + 6 27 Once + 8 12 CBra 2 + 11 7 CBra 3 + 14 a + 16 \2 + 18 7 Ket + 20 11 Alt + 22 5 CBra 4 + 25 a* + 27 5 Ket + 29 22 Recurse + 31 23 Ket + 33 27 Ket + 35 32 KetRmax + 37 a?+ + 39 39 Ket + 41 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 20 Bra + 2 16 Once + 4 12 CBra 1 + 7 9 Recurse + 9 5 CBra 2 + 12 \1 + 14 5 Ket + 16 12 Ket + 18 16 Ket + 20 20 Ket + 22 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 21 Bra + 2 4 Once + 4 14 Recurse + 6 4 Ket + 8 4 Once + 10 14 Recurse + 12 4 Ket + 14 5 CBra 1 + 17 a + 19 5 Ket + 21 21 Ket + 23 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 28 Bra + 2 Any + 3 18 Once + 5 7 CBra 1 + 8 23 Recurse + 10 0 Recurse + 12 4 Alt + 14 \1 + 16 3 Alt + 18 $ + 19 14 Ket + 21 18 Ket + 23 3 CBra 2 + 26 3 Ket + 28 28 Ket + 30 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 35 Bra + 2 Any + 3 25 Once + 5 14 CBra 1 + 8 30 Recurse + 10 0 Recurse + 12 3 CBra 2 + 15 3 Ket + 17 12 Recurse + 19 4 Alt + 21 \1 + 23 3 Alt + 25 $ + 26 21 Ket + 28 25 Ket + 30 3 CBra 3 + 33 3 Ket + 35 35 Ket + 37 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 50 Bra + 2 4 Recurse + 4 3 CBra 1 + 7 3 Ket + 9 39 CBra 2 + 12 32 CBra 3 + 15 27 CBra 4 + 18 22 CBra 5 + 21 15 CBra 6 + 24 10 CBra 7 + 27 5 Once + 29 \1+ + 32 5 Ket + 34 10 Ket + 36 15 Ket + 38 \x{85} + 40 22 KetRmax + 42 27 Ket + 44 2 Alt + 46 34 Ket + 48 39 Ket + 50 50 Ket + 52 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5813: regular expression is too complicated + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5820: regular expression is too complicated + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +# End of testinput8 diff --git a/pcre2/testdata/testoutput8-32-4 b/pcre2/testdata/testoutput8-32-4 new file mode 100644 index 000000000..e40d9d44b --- /dev/null +++ b/pcre2/testdata/testoutput8-32-4 @@ -0,0 +1,1028 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 5 CBra 1 + 5 /i b + 7 5 Ket + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation (code space): 76 +------------------------------------------------------------------ + 0 16 Bra + 2 7 CBra 1 + 5 AllAny* + 7 X + 9 5 Alt + 11 ^ + 12 B + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation (code space): 72 +------------------------------------------------------------------ + 0 15 Bra + 2 6 Bra + 4 AllAny* + 6 X + 8 5 Alt + 10 ^ + 11 B + 13 11 Ket + 15 15 Ket + 17 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation (code space): 60 +------------------------------------------------------------------ + 0 12 Bra + 2 ^ + 3 [0-9A-Za-z] + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation (code space): 20 +------------------------------------------------------------------ + 0 2 Bra + 2 2 Ket + 4 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x?+ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x++/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x++ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation (code space): 40 +------------------------------------------------------------------ + 0 7 Bra + 2 x + 4 x{0,2}+ + 7 7 Ket + 9 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 Braposzero + 3 5 CBraPos 1 + 6 x + 8 5 KetRpos + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation (code space): 220 +------------------------------------------------------------------ + 0 52 Bra + 2 ^ + 3 47 CBra 1 + 6 5 CBra 2 + 9 a+ + 11 5 Ket + 13 13 CBra 3 + 16 [ab]+? + 26 13 Ket + 28 13 CBra 4 + 31 [bc]+ + 41 13 Ket + 43 5 CBra 5 + 46 \w*+ + 48 5 Ket + 50 47 Ket + 52 52 Ket + 54 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 3296 +------------------------------------------------------------------ + 0 821 Bra + 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +820 \b +821 821 Ket +823 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 3256 +------------------------------------------------------------------ + 0 811 Bra + 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +810 \b +811 811 Ket +813 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation (code space): 64 +------------------------------------------------------------------ + 0 13 Bra + 2 9 CBra 1 + 5 a + 7 2 Recurse + 9 b + 11 9 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation (code space): 80 +------------------------------------------------------------------ + 0 17 Bra + 2 13 CBra 1 + 5 a + 7 4 Once + 9 2 Recurse + 11 4 KetRmax + 13 b + 15 13 Ket + 17 17 Ket + 19 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation (code space): 108 +------------------------------------------------------------------ + 0 24 Bra + 2 a + 4 5 CBra 1 + 7 b + 9 4 Alt + 11 c + 13 9 Ket + 15 d + 17 5 CBra 2 + 20 e + 22 5 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation (code space): 128 +------------------------------------------------------------------ + 0 29 Bra + 2 18 Bra + 4 a + 6 12 CBra 1 + 9 c + 11 5 CBra 2 + 14 d + 16 5 Ket + 18 12 Ket + 20 18 Ket + 22 5 CBra 3 + 25 a + 27 5 Ket + 29 29 Ket + 31 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation (code space): 108 +------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 + 5 a + 7 5 Ket + 9 Any + 10 Any + 11 Any + 12 \1 + 14 bbb + 20 2 Recurse + 22 d + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation (code space): 100 +------------------------------------------------------------------ + 0 22 Bra + 2 abc + 8 Callout 255 10 1 + 12 de + 16 Callout 0 16 1 + 20 f + 22 22 Ket + 24 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation (code space): 156 +------------------------------------------------------------------ + 0 36 Bra + 2 Callout 255 0 1 + 6 a + 8 Callout 255 1 1 + 12 b + 14 Callout 255 2 1 + 18 c + 20 Callout 255 3 1 + 24 d + 26 Callout 255 4 1 + 30 e + 32 Callout 255 5 0 + 36 36 Ket + 38 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{1000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10ffff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{80} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 A\x{2262}\x{391}. + 10 10 Ket + 12 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation (code space): 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{d55c}\x{ad6d}\x{c5b4} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation (code space): 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{65e5}\x{672c}\x{8a9e} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation (code space): 76 +------------------------------------------------------------------ + 0 16 Bra + 2 [Z\x{100}] + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E]/utf +Memory allocation (code space): 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation (code space): 88 +------------------------------------------------------------------ + 0 19 Bra + 2 [a-c\p{L}\x{660}] + 19 19 Ket + 21 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation (code space): 84 +------------------------------------------------------------------ + 0 18 Bra + 2 [+\-\p{Nd}]++ + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation (code space): 60 +------------------------------------------------------------------ + 0 12 Bra + 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation (code space): 60 +------------------------------------------------------------------ + 0 12 Bra + 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation (code space): 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\x{104}-\x{109}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation (code space): 104 +------------------------------------------------------------------ + 0 23 Bra + 2 19 CBra 1 + 5 Brazero + 6 13 SCBra 2 + 9 6 Cond + 11 1 Cond ref + 13 0 + 15 2 Alt + 17 8 Ket + 19 13 KetRmax + 21 19 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation (code space): 84 +------------------------------------------------------------------ + 0 18 Bra + 2 14 CBra 1 + 5 Brazero + 6 6 SCond + 8 1 Cond ref + 10 0 + 12 2 Alt + 14 8 KetRmax + 16 14 Ket + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 9 Bra + 2 [^\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Cc}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{Cc}\P{L}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 10 Bra + 2 [\p{L}]++ + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Xsp}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 60 Bra + 2 abc + 8 5 CBra 1 + 11 d + 13 4 Alt + 15 e + 17 9 Ket + 19 *THEN + 20 x + 22 12 CBra 2 + 25 123 + 31 *THEN + 32 4 + 34 24 Alt + 36 567 + 42 5 CBra 3 + 45 b + 47 4 Alt + 49 q + 51 9 Ket + 53 *THEN + 54 xx + 58 36 Ket + 60 60 Ket + 62 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 39 Bra + 2 Brazero + 3 32 SCBra 1 + 6 27 Once + 8 12 CBra 2 + 11 7 CBra 3 + 14 a + 16 \2 + 18 7 Ket + 20 11 Alt + 22 5 CBra 4 + 25 a* + 27 5 Ket + 29 22 Recurse + 31 23 Ket + 33 27 Ket + 35 32 KetRmax + 37 a?+ + 39 39 Ket + 41 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 20 Bra + 2 16 Once + 4 12 CBra 1 + 7 9 Recurse + 9 5 CBra 2 + 12 \1 + 14 5 Ket + 16 12 Ket + 18 16 Ket + 20 20 Ket + 22 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 21 Bra + 2 4 Once + 4 14 Recurse + 6 4 Ket + 8 4 Once + 10 14 Recurse + 12 4 Ket + 14 5 CBra 1 + 17 a + 19 5 Ket + 21 21 Ket + 23 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 28 Bra + 2 Any + 3 18 Once + 5 7 CBra 1 + 8 23 Recurse + 10 0 Recurse + 12 4 Alt + 14 \1 + 16 3 Alt + 18 $ + 19 14 Ket + 21 18 Ket + 23 3 CBra 2 + 26 3 Ket + 28 28 Ket + 30 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 35 Bra + 2 Any + 3 25 Once + 5 14 CBra 1 + 8 30 Recurse + 10 0 Recurse + 12 3 CBra 2 + 15 3 Ket + 17 12 Recurse + 19 4 Alt + 21 \1 + 23 3 Alt + 25 $ + 26 21 Ket + 28 25 Ket + 30 3 CBra 3 + 33 3 Ket + 35 35 Ket + 37 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 50 Bra + 2 4 Recurse + 4 3 CBra 1 + 7 3 Ket + 9 39 CBra 2 + 12 32 CBra 3 + 15 27 CBra 4 + 18 22 CBra 5 + 21 15 CBra 6 + 24 10 CBra 7 + 27 5 Once + 29 \1+ + 32 5 Ket + 34 10 Ket + 36 15 Ket + 38 \x{85} + 40 22 KetRmax + 42 27 Ket + 44 2 Alt + 46 34 Ket + 48 39 Ket + 50 50 Ket + 52 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5813: regular expression is too complicated + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5820: regular expression is too complicated + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +# End of testinput8 diff --git a/pcre2/testdata/testoutput8-8 b/pcre2/testdata/testoutput8-8-2 similarity index 81% rename from pcre2/testdata/testoutput8-8 rename to pcre2/testdata/testoutput8-8-2 index a12cc7122..97ee41759 100644 --- a/pcre2/testdata/testoutput8-8 +++ b/pcre2/testdata/testoutput8-8-2 @@ -1,8 +1,11 @@ -# These are a few representative patterns whose lengths and offsets are to be -# shown when the link size is 2. This is just a doublecheck test to ensure the -# sizes don't go horribly wrong when something is changed. The pattern contents -# are all themselves checked in other tests. Unicode, including property -# support, is required for these tests. +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. #pattern fullbincode,memory @@ -378,7 +381,7 @@ Options: utf First code unit = 'A' Last code unit = '.' Subject length lower bound = 4 - + /\x{D55c}\x{ad6d}\x{C5B4}/I,utf Memory allocation (code space): 19 ------------------------------------------------------------------ @@ -842,11 +845,184 @@ Memory allocation (code space): 10 # Check the absolute limit on nesting (?| etc. This varies with code unit # width because the workspace is a different number of bytes. It will fail -# in 8-bit and 16-bit but not in 32-bit. - +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) /parens_nest_limit=1000,-fullbincode Failed: error 184 at offset 1540: (?| and/or (?J: or (?x: parentheses are too deeply nested +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 +Failed: error 186 at offset 5820: regular expression is too complicated + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 119 Bra + 3 105 Once + 6 9 Cond + 9 1 Cond ref + 12 111 Recurse + 15 9 Ket + 18 9 Cond + 21 1 Cond ref + 24 111 Recurse + 27 9 Ket + 30 9 Cond + 33 1 Cond ref + 36 111 Recurse + 39 9 Ket + 42 9 Cond + 45 1 Cond ref + 48 111 Recurse + 51 9 Ket + 54 9 Cond + 57 1 Cond ref + 60 111 Recurse + 63 9 Ket + 66 9 Cond + 69 1 Cond ref + 72 111 Recurse + 75 9 Ket + 78 9 Cond + 81 1 Cond ref + 84 111 Recurse + 87 9 Ket + 90 15 SBraPos + 93 9 SCond + 96 1 Cond ref + 99 111 Recurse +102 9 Ket +105 15 KetRpos +108 105 Ket +111 5 CBra 1 +116 5 Ket +119 119 Ket +122 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 61 Bra + 3 47 Once + 6 6 Cond + 9 1 Cond ref + 12 10 Alt + 15 a + 17 53 Recurse + 20 b + 22 16 Ket + 25 22 SBraPos + 28 6 SCond + 31 1 Cond ref + 34 10 Alt + 37 a + 39 53 Recurse + 42 b + 44 16 Ket + 47 22 KetRpos + 50 47 Ket + 53 5 CBra 1 + 58 5 Ket + 61 61 Ket + 64 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 205 Bra + 3 62 CBra 1 + 8 3 Recurse + 11 133 Recurse + 14 141 Recurse + 17 149 Recurse + 20 157 Recurse + 23 165 Recurse + 26 173 Recurse + 29 181 Recurse + 32 189 Recurse + 35 189 Recurse + 38 181 Recurse + 41 173 Recurse + 44 165 Recurse + 47 157 Recurse + 50 149 Recurse + 53 141 Recurse + 56 133 Recurse + 59 3 Recurse + 62 0 Recurse + 65 62 Ket + 68 62 SCBra 1 + 73 3 Recurse + 76 133 Recurse + 79 141 Recurse + 82 149 Recurse + 85 157 Recurse + 88 165 Recurse + 91 173 Recurse + 94 181 Recurse + 97 189 Recurse +100 189 Recurse +103 181 Recurse +106 173 Recurse +109 165 Recurse +112 157 Recurse +115 149 Recurse +118 141 Recurse +121 133 Recurse +124 3 Recurse +127 0 Recurse +130 62 KetRmax +133 5 CBra 2 +138 5 Ket +141 5 CBra 3 +146 5 Ket +149 5 CBra 4 +154 5 Ket +157 5 CBra 5 +162 5 Ket +165 5 CBra 6 +170 5 Ket +173 5 CBra 7 +178 5 Ket +181 5 CBra 8 +186 5 Ket +189 5 CBra 9 +194 5 Ket +197 5 CBra 10 +202 5 Ket +205 205 Ket +208 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + # End of testinput8 diff --git a/pcre2/testdata/testoutput8-8-3 b/pcre2/testdata/testoutput8-8-3 new file mode 100644 index 000000000..f2de83f0e --- /dev/null +++ b/pcre2/testdata/testoutput8-8-3 @@ -0,0 +1,1026 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 16 Bra + 4 8 CBra 1 + 10 /i b + 12 8 Ket + 16 16 Ket + 20 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation (code space): 30 +------------------------------------------------------------------ + 0 25 Bra + 4 10 CBra 1 + 10 AllAny* + 12 X + 14 7 Alt + 18 ^ + 19 B + 21 17 Ket + 25 25 Ket + 29 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation (code space): 28 +------------------------------------------------------------------ + 0 23 Bra + 4 8 Bra + 8 AllAny* + 10 X + 12 7 Alt + 16 ^ + 17 B + 19 15 Ket + 23 23 Ket + 27 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation (code space): 43 +------------------------------------------------------------------ + 0 38 Bra + 4 ^ + 5 [0-9A-Za-z] + 38 38 Ket + 42 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation (code space): 9 +------------------------------------------------------------------ + 0 4 Bra + 4 4 Ket + 8 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 a + 6 6 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 x?+ + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/x++/ +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 x++ + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation (code space): 15 +------------------------------------------------------------------ + 0 10 Bra + 4 x + 6 x{0,2}+ + 10 10 Ket + 14 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation (code space): 22 +------------------------------------------------------------------ + 0 17 Bra + 4 Braposzero + 5 8 CBraPos 1 + 11 x + 13 8 KetRpos + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation (code space): 132 +------------------------------------------------------------------ + 0 127 Bra + 4 ^ + 5 118 CBra 1 + 11 8 CBra 2 + 17 a+ + 19 8 Ket + 23 40 CBra 3 + 29 [ab]+? + 63 40 Ket + 67 40 CBra 4 + 73 [bc]+ +107 40 Ket +111 8 CBra 5 +117 \w*+ +119 8 Ket +123 118 Ket +127 127 Ket +131 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 828 +------------------------------------------------------------------ + 0 823 Bra + 4 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +822 \b +823 823 Ket +827 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 818 +------------------------------------------------------------------ + 0 813 Bra + 4 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +812 \b +813 813 Ket +817 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation (code space): 27 +------------------------------------------------------------------ + 0 22 Bra + 4 14 CBra 1 + 10 a + 12 4 Recurse + 16 b + 18 14 Ket + 22 22 Ket + 26 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation (code space): 35 +------------------------------------------------------------------ + 0 30 Bra + 4 22 CBra 1 + 10 a + 12 8 Once + 16 4 Recurse + 20 8 KetRmax + 24 b + 26 22 Ket + 30 30 Ket + 34 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation (code space): 43 +------------------------------------------------------------------ + 0 38 Bra + 4 a + 6 8 CBra 1 + 12 b + 14 6 Alt + 18 c + 20 14 Ket + 24 d + 26 8 CBra 2 + 32 e + 34 8 Ket + 38 38 Ket + 42 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation (code space): 55 +------------------------------------------------------------------ + 0 50 Bra + 4 30 Bra + 8 a + 10 20 CBra 1 + 16 c + 18 8 CBra 2 + 24 d + 26 8 Ket + 30 20 Ket + 34 30 Ket + 38 8 CBra 3 + 44 a + 46 8 Ket + 50 50 Ket + 54 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation (code space): 39 +------------------------------------------------------------------ + 0 34 Bra + 4 8 CBra 1 + 10 a + 12 8 Ket + 16 Any + 17 Any + 18 Any + 19 \1 + 22 bbb + 28 4 Recurse + 32 d + 34 34 Ket + 38 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation (code space): 37 +------------------------------------------------------------------ + 0 32 Bra + 4 abc + 10 Callout 255 10 1 + 18 de + 22 Callout 0 16 1 + 30 f + 32 32 Ket + 36 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation (code space): 67 +------------------------------------------------------------------ + 0 62 Bra + 4 Callout 255 0 1 + 12 a + 14 Callout 255 1 1 + 22 b + 24 Callout 255 2 1 + 32 c + 34 Callout 255 3 1 + 42 d + 44 Callout 255 4 1 + 52 e + 54 Callout 255 5 0 + 62 62 Ket + 66 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{100} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 8 Bra + 4 \x{1000} + 8 8 Ket + 12 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 9 Bra + 4 \x{10000} + 9 9 Ket + 13 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 9 Bra + 4 \x{100000} + 9 9 Ket + 13 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 9 Bra + 4 \x{10ffff} + 9 9 Ket + 13 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{ff} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{100} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{80} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{ff} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation (code space): 20 +------------------------------------------------------------------ + 0 15 Bra + 4 A\x{2262}\x{391}. + 15 15 Ket + 19 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 16 Bra + 4 \x{d55c}\x{ad6d}\x{c5b4} + 16 16 Ket + 20 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \xed +Last code unit = \xb4 +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 16 Bra + 4 \x{65e5}\x{672c}\x{8a9e} + 16 16 Ket + 20 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \xe6 +Last code unit = \x9e +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{100} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation (code space): 50 +------------------------------------------------------------------ + 0 45 Bra + 4 [Z\x{100}] + 45 45 Ket + 49 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 16 Bra + 4 ^ + 5 [\x{100}-\x{150}] + 16 16 Ket + 20 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E]/utf +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 16 Bra + 4 ^ + 5 [\x{100}-\x{150}] + 16 16 Ket + 20 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E/utf +Failed: error 106 at offset 15: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\p{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\P{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\P{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\p{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation (code space): 53 +------------------------------------------------------------------ + 0 48 Bra + 4 [a-c\p{L}\x{660}] + 48 48 Ket + 52 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation (code space): 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\p{Nd}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation (code space): 51 +------------------------------------------------------------------ + 0 46 Bra + 4 [+\-\p{Nd}]++ + 46 46 Ket + 50 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation (code space): 27 +------------------------------------------------------------------ + 0 22 Bra + 4 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 22 22 Ket + 26 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation (code space): 27 +------------------------------------------------------------------ + 0 22 Bra + 4 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 22 22 Ket + 26 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation (code space): 20 +------------------------------------------------------------------ + 0 15 Bra + 4 [\x{104}-\x{109}] + 15 15 Ket + 19 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation (code space): 47 +------------------------------------------------------------------ + 0 42 Bra + 4 34 CBra 1 + 10 Brazero + 11 23 SCBra 2 + 17 9 Cond + 21 1 Cond ref + 24 0 + 26 4 Alt + 30 13 Ket + 34 23 KetRmax + 38 34 Ket + 42 42 Ket + 46 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation (code space): 37 +------------------------------------------------------------------ + 0 32 Bra + 4 24 CBra 1 + 10 Brazero + 11 9 SCond + 15 1 Cond ref + 18 0 + 20 4 Alt + 24 13 KetRmax + 28 24 Ket + 32 32 Ket + 36 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 a + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 a + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 \x{aa} + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{aa} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 [^a] + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 [^a] + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 6 Bra + 4 [^\x{aa}] + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation (code space): 12 +------------------------------------------------------------------ + 0 7 Bra + 4 [^\x{aa}] + 7 7 Ket + 11 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 4 [^\p{Nd}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 17 Bra + 4 [\P{L}\P{Cc}]++ + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 17 Bra + 4 [\P{Cc}\P{L}]++ + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 14 Bra + 4 [\p{L}]++ + 14 14 Ket + 18 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 17 Bra + 4 [\P{L}\P{Xsp}]++ + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 83 Bra + 4 abc + 10 8 CBra 1 + 16 d + 18 6 Alt + 22 e + 24 14 Ket + 28 *THEN + 29 x + 31 15 CBra 2 + 37 123 + 43 *THEN + 44 4 + 46 33 Alt + 50 567 + 56 8 CBra 3 + 62 b + 64 6 Alt + 68 q + 70 14 Ket + 74 *THEN + 75 xx + 79 48 Ket + 83 83 Ket + 87 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 70 Bra + 4 Brazero + 5 59 SCBra 1 + 11 49 Once + 15 21 CBra 2 + 21 11 CBra 3 + 27 a + 29 \2 + 32 11 Ket + 36 20 Alt + 40 8 CBra 4 + 46 a* + 48 8 Ket + 52 40 Recurse + 56 41 Ket + 60 49 Ket + 64 59 KetRmax + 68 a?+ + 70 70 Ket + 74 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 39 Bra + 4 31 Once + 8 23 CBra 1 + 14 18 Recurse + 18 9 CBra 2 + 24 \1 + 27 9 Ket + 31 23 Ket + 35 31 Ket + 39 39 Ket + 43 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 40 Bra + 4 8 Once + 8 28 Recurse + 12 8 Ket + 16 8 Once + 20 28 Recurse + 24 8 Ket + 28 8 CBra 1 + 34 a + 36 8 Ket + 40 40 Ket + 44 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 53 Bra + 4 Any + 5 34 Once + 9 14 CBra 1 + 15 43 Recurse + 19 0 Recurse + 23 7 Alt + 27 \1 + 30 5 Alt + 34 $ + 35 26 Ket + 39 34 Ket + 43 6 CBra 2 + 49 6 Ket + 53 53 Ket + 57 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 67 Bra + 4 Any + 5 48 Once + 9 28 CBra 1 + 15 57 Recurse + 19 0 Recurse + 23 6 CBra 2 + 29 6 Ket + 33 23 Recurse + 37 7 Alt + 41 \1 + 44 5 Alt + 48 $ + 49 40 Ket + 53 48 Ket + 57 6 CBra 3 + 63 6 Ket + 67 67 Ket + 71 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 96 Bra + 4 8 Recurse + 8 6 CBra 1 + 14 6 Ket + 18 74 CBra 2 + 24 60 CBra 3 + 30 50 CBra 4 + 36 40 CBra 5 + 42 28 CBra 6 + 48 18 CBra 7 + 54 8 Once + 58 \1+ + 62 8 Ket + 66 18 Ket + 70 28 Ket + 74 \x{85} + 76 40 KetRmax + 80 50 Ket + 84 4 Alt + 88 64 Ket + 92 74 Ket + 96 96 Ket +100 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 150 Bra + 4 132 Once + 8 11 Cond + 12 1 Cond ref + 15 140 Recurse + 19 11 Ket + 23 11 Cond + 27 1 Cond ref + 30 140 Recurse + 34 11 Ket + 38 11 Cond + 42 1 Cond ref + 45 140 Recurse + 49 11 Ket + 53 11 Cond + 57 1 Cond ref + 60 140 Recurse + 64 11 Ket + 68 11 Cond + 72 1 Cond ref + 75 140 Recurse + 79 11 Ket + 83 11 Cond + 87 1 Cond ref + 90 140 Recurse + 94 11 Ket + 98 11 Cond +102 1 Cond ref +105 140 Recurse +109 11 Ket +113 19 SBraPos +117 11 SCond +121 1 Cond ref +124 140 Recurse +128 11 Ket +132 19 KetRpos +136 132 Ket +140 6 CBra 1 +146 6 Ket +150 150 Ket +154 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 76 Bra + 4 58 Once + 8 7 Cond + 12 1 Cond ref + 15 12 Alt + 19 a + 21 66 Recurse + 25 b + 27 19 Ket + 31 27 SBraPos + 35 7 SCond + 39 1 Cond ref + 42 12 Alt + 46 a + 48 66 Recurse + 52 b + 54 19 Ket + 58 27 KetRpos + 62 58 Ket + 66 6 CBra 1 + 72 6 Ket + 76 76 Ket + 80 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 266 Bra + 4 82 CBra 1 + 10 4 Recurse + 14 176 Recurse + 18 186 Recurse + 22 196 Recurse + 26 206 Recurse + 30 216 Recurse + 34 226 Recurse + 38 236 Recurse + 42 246 Recurse + 46 246 Recurse + 50 236 Recurse + 54 226 Recurse + 58 216 Recurse + 62 206 Recurse + 66 196 Recurse + 70 186 Recurse + 74 176 Recurse + 78 4 Recurse + 82 0 Recurse + 86 82 Ket + 90 82 SCBra 1 + 96 4 Recurse +100 176 Recurse +104 186 Recurse +108 196 Recurse +112 206 Recurse +116 216 Recurse +120 226 Recurse +124 236 Recurse +128 246 Recurse +132 246 Recurse +136 236 Recurse +140 226 Recurse +144 216 Recurse +148 206 Recurse +152 196 Recurse +156 186 Recurse +160 176 Recurse +164 4 Recurse +168 0 Recurse +172 82 KetRmax +176 6 CBra 2 +182 6 Ket +186 6 CBra 3 +192 6 Ket +196 6 CBra 4 +202 6 Ket +206 6 CBra 5 +212 6 Ket +216 6 CBra 6 +222 6 Ket +226 6 CBra 7 +232 6 Ket +236 6 CBra 8 +242 6 Ket +246 6 CBra 9 +252 6 Ket +256 6 CBra 10 +262 6 Ket +266 266 Ket +270 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +# End of testinput8 diff --git a/pcre2/testdata/testoutput8-8-4 b/pcre2/testdata/testoutput8-8-4 new file mode 100644 index 000000000..435301495 --- /dev/null +++ b/pcre2/testdata/testoutput8-8-4 @@ -0,0 +1,1026 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation (code space): 25 +------------------------------------------------------------------ + 0 19 Bra + 5 9 CBra 1 + 12 /i b + 14 9 Ket + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation (code space): 35 +------------------------------------------------------------------ + 0 29 Bra + 5 11 CBra 1 + 12 AllAny* + 14 X + 16 8 Alt + 21 ^ + 22 B + 24 19 Ket + 29 29 Ket + 34 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation (code space): 33 +------------------------------------------------------------------ + 0 27 Bra + 5 9 Bra + 10 AllAny* + 12 X + 14 8 Alt + 19 ^ + 20 B + 22 17 Ket + 27 27 Ket + 32 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation (code space): 45 +------------------------------------------------------------------ + 0 39 Bra + 5 ^ + 6 [0-9A-Za-z] + 39 39 Ket + 44 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation (code space): 11 +------------------------------------------------------------------ + 0 5 Bra + 5 5 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 a + 7 7 Ket + 12 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 x?+ + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/x++/ +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 x++ + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation (code space): 17 +------------------------------------------------------------------ + 0 11 Bra + 5 x + 7 x{0,2}+ + 11 11 Ket + 16 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation (code space): 26 +------------------------------------------------------------------ + 0 20 Bra + 5 Braposzero + 6 9 CBraPos 1 + 13 x + 15 9 KetRpos + 20 20 Ket + 25 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation (code space): 144 +------------------------------------------------------------------ + 0 138 Bra + 5 ^ + 6 127 CBra 1 + 13 9 CBra 2 + 20 a+ + 22 9 Ket + 27 41 CBra 3 + 34 [ab]+? + 68 41 Ket + 73 41 CBra 4 + 80 [bc]+ +114 41 Ket +119 9 CBra 5 +126 \w*+ +128 9 Ket +133 127 Ket +138 138 Ket +143 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 830 +------------------------------------------------------------------ + 0 824 Bra + 5 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +823 \b +824 824 Ket +829 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation (code space): 820 +------------------------------------------------------------------ + 0 814 Bra + 5 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +813 \b +814 814 Ket +819 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation (code space): 32 +------------------------------------------------------------------ + 0 26 Bra + 5 16 CBra 1 + 12 a + 14 5 Recurse + 19 b + 21 16 Ket + 26 26 Ket + 31 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation (code space): 42 +------------------------------------------------------------------ + 0 36 Bra + 5 26 CBra 1 + 12 a + 14 10 Once + 19 5 Recurse + 24 10 KetRmax + 29 b + 31 26 Ket + 36 36 Ket + 41 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation (code space): 50 +------------------------------------------------------------------ + 0 44 Bra + 5 a + 7 9 CBra 1 + 14 b + 16 7 Alt + 21 c + 23 16 Ket + 28 d + 30 9 CBra 2 + 37 e + 39 9 Ket + 44 44 Ket + 49 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation (code space): 65 +------------------------------------------------------------------ + 0 59 Bra + 5 35 Bra + 10 a + 12 23 CBra 1 + 19 c + 21 9 CBra 2 + 28 d + 30 9 Ket + 35 23 Ket + 40 35 Ket + 45 9 CBra 3 + 52 a + 54 9 Ket + 59 59 Ket + 64 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation (code space): 44 +------------------------------------------------------------------ + 0 38 Bra + 5 9 CBra 1 + 12 a + 14 9 Ket + 19 Any + 20 Any + 21 Any + 22 \1 + 25 bbb + 31 5 Recurse + 36 d + 38 38 Ket + 43 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation (code space): 43 +------------------------------------------------------------------ + 0 37 Bra + 5 abc + 11 Callout 255 10 1 + 21 de + 25 Callout 0 16 1 + 35 f + 37 37 Ket + 42 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation (code space): 81 +------------------------------------------------------------------ + 0 75 Bra + 5 Callout 255 0 1 + 15 a + 17 Callout 255 1 1 + 27 b + 29 Callout 255 2 1 + 39 c + 41 Callout 255 3 1 + 51 d + 53 Callout 255 4 1 + 63 e + 65 Callout 255 5 0 + 75 75 Ket + 80 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{100} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation (code space): 15 +------------------------------------------------------------------ + 0 9 Bra + 5 \x{1000} + 9 9 Ket + 14 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation (code space): 16 +------------------------------------------------------------------ + 0 10 Bra + 5 \x{10000} + 10 10 Ket + 15 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation (code space): 16 +------------------------------------------------------------------ + 0 10 Bra + 5 \x{100000} + 10 10 Ket + 15 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation (code space): 16 +------------------------------------------------------------------ + 0 10 Bra + 5 \x{10ffff} + 10 10 Ket + 15 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{ff} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{100} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{80} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{ff} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation (code space): 22 +------------------------------------------------------------------ + 0 16 Bra + 5 A\x{2262}\x{391}. + 16 16 Ket + 21 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation (code space): 23 +------------------------------------------------------------------ + 0 17 Bra + 5 \x{d55c}\x{ad6d}\x{c5b4} + 17 17 Ket + 22 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \xed +Last code unit = \xb4 +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation (code space): 23 +------------------------------------------------------------------ + 0 17 Bra + 5 \x{65e5}\x{672c}\x{8a9e} + 17 17 Ket + 22 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First code unit = \xe6 +Last code unit = \x9e +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{100} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation (code space): 53 +------------------------------------------------------------------ + 0 47 Bra + 5 [Z\x{100}] + 47 47 Ket + 52 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation (code space): 24 +------------------------------------------------------------------ + 0 18 Bra + 5 ^ + 6 [\x{100}-\x{150}] + 18 18 Ket + 23 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E]/utf +Memory allocation (code space): 24 +------------------------------------------------------------------ + 0 18 Bra + 5 ^ + 6 [\x{100}-\x{150}] + 18 18 Ket + 23 End +------------------------------------------------------------------ + +/^[\QÄ€\E-\QÅ\E/utf +Failed: error 106 at offset 15: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\p{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\P{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\P{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\p{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation (code space): 56 +------------------------------------------------------------------ + 0 50 Bra + 5 [a-c\p{L}\x{660}] + 50 50 Ket + 55 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation (code space): 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\p{Nd}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation (code space): 54 +------------------------------------------------------------------ + 0 48 Bra + 5 [+\-\p{Nd}]++ + 48 48 Ket + 53 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation (code space): 29 +------------------------------------------------------------------ + 0 23 Bra + 5 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 23 23 Ket + 28 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation (code space): 29 +------------------------------------------------------------------ + 0 23 Bra + 5 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 23 23 Ket + 28 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation (code space): 23 +------------------------------------------------------------------ + 0 17 Bra + 5 [\x{104}-\x{109}] + 17 17 Ket + 22 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation (code space): 56 +------------------------------------------------------------------ + 0 50 Bra + 5 40 CBra 1 + 12 Brazero + 13 27 SCBra 2 + 20 10 Cond + 25 1 Cond ref + 28 0 + 30 5 Alt + 35 15 Ket + 40 27 KetRmax + 45 40 Ket + 50 50 Ket + 55 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation (code space): 44 +------------------------------------------------------------------ + 0 38 Bra + 5 28 CBra 1 + 12 Brazero + 13 10 SCond + 18 1 Cond ref + 21 0 + 23 5 Alt + 28 15 KetRmax + 33 28 Ket + 38 38 Ket + 43 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 a + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 a + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 \x{aa} + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{aa} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 [^a] + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 [^a] + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation (code space): 13 +------------------------------------------------------------------ + 0 7 Bra + 5 [^\x{aa}] + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation (code space): 14 +------------------------------------------------------------------ + 0 8 Bra + 5 [^\x{aa}] + 8 8 Ket + 13 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 5 [^\p{Nd}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 19 Bra + 5 [\P{L}\P{Cc}]++ + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 19 Bra + 5 [\P{Cc}\P{L}]++ + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 16 Bra + 5 [\p{L}]++ + 16 16 Ket + 21 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 19 Bra + 5 [\P{L}\P{Xsp}]++ + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 93 Bra + 5 abc + 11 9 CBra 1 + 18 d + 20 7 Alt + 25 e + 27 16 Ket + 32 *THEN + 33 x + 35 16 CBra 2 + 42 123 + 48 *THEN + 49 4 + 51 37 Alt + 56 567 + 62 9 CBra 3 + 69 b + 71 7 Alt + 76 q + 78 16 Ket + 83 *THEN + 84 xx + 88 53 Ket + 93 93 Ket + 98 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 83 Bra + 5 Brazero + 6 70 SCBra 1 + 13 58 Once + 18 24 CBra 2 + 25 12 CBra 3 + 32 a + 34 \2 + 37 12 Ket + 42 24 Alt + 47 9 CBra 4 + 54 a* + 56 9 Ket + 61 47 Recurse + 66 48 Ket + 71 58 Ket + 76 70 KetRmax + 81 a?+ + 83 83 Ket + 88 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 47 Bra + 5 37 Once + 10 27 CBra 1 + 17 22 Recurse + 22 10 CBra 2 + 29 \1 + 32 10 Ket + 37 27 Ket + 42 37 Ket + 47 47 Ket + 52 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 49 Bra + 5 10 Once + 10 35 Recurse + 15 10 Ket + 20 10 Once + 25 35 Recurse + 30 10 Ket + 35 9 CBra 1 + 42 a + 44 9 Ket + 49 49 Ket + 54 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 64 Bra + 5 Any + 6 41 Once + 11 17 CBra 1 + 18 52 Recurse + 23 0 Recurse + 28 8 Alt + 33 \1 + 36 6 Alt + 41 $ + 42 31 Ket + 47 41 Ket + 52 7 CBra 2 + 59 7 Ket + 64 64 Ket + 69 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 81 Bra + 5 Any + 6 58 Once + 11 34 CBra 1 + 18 69 Recurse + 23 0 Recurse + 28 7 CBra 2 + 35 7 Ket + 40 28 Recurse + 45 8 Alt + 50 \1 + 53 6 Alt + 58 $ + 59 48 Ket + 64 58 Ket + 69 7 CBra 3 + 76 7 Ket + 81 81 Ket + 86 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 115 Bra + 5 10 Recurse + 10 7 CBra 1 + 17 7 Ket + 22 88 CBra 2 + 29 71 CBra 3 + 36 59 CBra 4 + 43 47 CBra 5 + 50 33 CBra 6 + 57 21 CBra 7 + 64 9 Once + 69 \1+ + 73 9 Ket + 78 21 Ket + 83 33 Ket + 88 \x{85} + 90 47 KetRmax + 95 59 Ket +100 5 Alt +105 76 Ket +110 88 Ket +115 115 Ket +120 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 181 Bra + 5 159 Once + 10 13 Cond + 15 1 Cond ref + 18 169 Recurse + 23 13 Ket + 28 13 Cond + 33 1 Cond ref + 36 169 Recurse + 41 13 Ket + 46 13 Cond + 51 1 Cond ref + 54 169 Recurse + 59 13 Ket + 64 13 Cond + 69 1 Cond ref + 72 169 Recurse + 77 13 Ket + 82 13 Cond + 87 1 Cond ref + 90 169 Recurse + 95 13 Ket +100 13 Cond +105 1 Cond ref +108 169 Recurse +113 13 Ket +118 13 Cond +123 1 Cond ref +126 169 Recurse +131 13 Ket +136 23 SBraPos +141 13 SCond +146 1 Cond ref +149 169 Recurse +154 13 Ket +159 23 KetRpos +164 159 Ket +169 7 CBra 1 +176 7 Ket +181 181 Ket +186 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 91 Bra + 5 69 Once + 10 8 Cond + 15 1 Cond ref + 18 14 Alt + 23 a + 25 79 Recurse + 30 b + 32 22 Ket + 37 32 SBraPos + 42 8 SCond + 47 1 Cond ref + 50 14 Alt + 55 a + 57 79 Recurse + 62 b + 64 22 Ket + 69 32 KetRpos + 74 69 Ket + 79 7 CBra 1 + 86 7 Ket + 91 91 Ket + 96 End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 327 Bra + 5 102 CBra 1 + 12 5 Recurse + 17 219 Recurse + 22 231 Recurse + 27 243 Recurse + 32 255 Recurse + 37 267 Recurse + 42 279 Recurse + 47 291 Recurse + 52 303 Recurse + 57 303 Recurse + 62 291 Recurse + 67 279 Recurse + 72 267 Recurse + 77 255 Recurse + 82 243 Recurse + 87 231 Recurse + 92 219 Recurse + 97 5 Recurse +102 0 Recurse +107 102 Ket +112 102 SCBra 1 +119 5 Recurse +124 219 Recurse +129 231 Recurse +134 243 Recurse +139 255 Recurse +144 267 Recurse +149 279 Recurse +154 291 Recurse +159 303 Recurse +164 303 Recurse +169 291 Recurse +174 279 Recurse +179 267 Recurse +184 255 Recurse +189 243 Recurse +194 231 Recurse +199 219 Recurse +204 5 Recurse +209 0 Recurse +214 102 KetRmax +219 7 CBra 2 +226 7 Ket +231 7 CBra 3 +238 7 Ket +243 7 CBra 4 +250 7 Ket +255 7 CBra 5 +262 7 Ket +267 7 CBra 6 +274 7 Ket +279 7 CBra 7 +286 7 Ket +291 7 CBra 8 +298 7 Ket +303 7 CBra 9 +310 7 Ket +315 7 CBra 10 +322 7 Ket +327 327 Ket +332 End +------------------------------------------------------------------ +Capturing subpattern count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +# End of testinput8 diff --git a/pcre2/testdata/testoutput9 b/pcre2/testdata/testoutput9 index 3aa2071f3..6b014e58d 100644 --- a/pcre2/testdata/testoutput9 +++ b/pcre2/testdata/testoutput9 @@ -2,14 +2,10 @@ # UTF-8 or Unicode property support. */ #forbid_utf +#newline_default lf any anycrlf -/a\Cb/ - aXb - 0: aXb - a\nb - 0: a\x0ab - ** Failers (too big char) -No match +/ab/ +\= Expect error message (too big char) and no match A\x{123}B ** Character \x{123} is greater than 255 and UTF-8 mode is not enabled. ** Truncation will probably give the wrong result. @@ -311,22 +307,31 @@ Subject length lower bound = 1 ------------------------------------------------------------------ /\777/I -Failed: error 151 at offset 3: octal value is greater than \377 in 8-bit non-UTF-8 mode +Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) XX +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark,alt_verbnames +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + XX + /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark XX 0: XX MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark,alt_verbnames + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE + /\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames -Failed: error 177 at offset 5: character code point value in \u.... sequence is too large +Failed: error 177 at offset 6: character code point value in \u.... sequence is too large /[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames -Failed: error 177 at offset 6: character code point value in \u.... sequence is too large +Failed: error 177 at offset 7: character code point value in \u.... sequence is too large /[^\x00-a]{12,}[^b-\xff]*/B ------------------------------------------------------------------ @@ -356,4 +361,10 @@ Failed: error 177 at offset 6: character code point value in \u.... sequence is End ------------------------------------------------------------------ +/(*MARK:a\x{100}b)z/alt_verbnames +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/ +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + # End of testinput9 diff --git a/pcre2/testdata/valgrind-jit.supp b/pcre2/testdata/valgrind-jit.supp new file mode 100644 index 000000000..f1d267183 --- /dev/null +++ b/pcre2/testdata/valgrind-jit.supp @@ -0,0 +1,15 @@ +{ + name + Memcheck:Addr16 + obj:??? + obj:??? + obj:??? +} + +{ + name + Memcheck:Cond + obj:??? + obj:??? + obj:??? +} diff --git a/pcre2/testdata/wintestoutput3 b/pcre2/testdata/wintestoutput3 index 0d8a69016..be856b167 100644 --- a/pcre2/testdata/wintestoutput3 +++ b/pcre2/testdata/wintestoutput3 @@ -159,7 +159,7 @@ No match /[[:alpha:]][[:lower:]][[:upper:]]/IB ------------------------------------------------------------------ Bra - [A-Za-z\x83\x8a\x8c\x8e\x9a\x9c\x9e\x9f\xaa\xb2\xb3\xb5\xb9\xba\xc0-\xd6\xd8-\xf6\xf8-\xff] + [A-Za-z\x83\x8a\x8c\x8e\x9a\x9c\x9e\x9f\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff] [a-z\x83\x9a\x9c\x9e\xaa\xb5\xba\xdf-\xf6\xf8-\xff] [A-Z\x8a\x8c\x8e\x9f\xc0-\xd6\xd8-\xde] Ket @@ -167,9 +167,9 @@ No match ------------------------------------------------------------------ Capturing subpattern count = 0 Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z ƒ Š Œ Ž š œ ž Ÿ ª ² ³ - µ ¹ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â - ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ + a b c d e f g h i j k l m n o p q r s t u v w x y z ƒ Š Œ Ž š œ ž Ÿ ª µ º + À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å + æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 3 # End of testinput3 diff --git a/plugins/nagios/check_maxscale_monitors.pl b/plugins/nagios/check_maxscale_monitors.pl index 8ec9674aa..e9636af06 100755 --- a/plugins/nagios/check_maxscale_monitors.pl +++ b/plugins/nagios/check_maxscale_monitors.pl @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/plugins/nagios/check_maxscale_resources.pl b/plugins/nagios/check_maxscale_resources.pl index 79fdf6613..59e3368d9 100755 --- a/plugins/nagios/check_maxscale_resources.pl +++ b/plugins/nagios/check_maxscale_resources.pl @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/plugins/nagios/check_maxscale_threads.pl b/plugins/nagios/check_maxscale_threads.pl index 973a22db7..82449d892 100755 --- a/plugins/nagios/check_maxscale_threads.pl +++ b/plugins/nagios/check_maxscale_threads.pl @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/query_classifier/qc_dummy/qc_dummy.cc b/query_classifier/qc_dummy/qc_dummy.cc index 564588db2..358411b30 100644 --- a/query_classifier/qc_dummy/qc_dummy.cc +++ b/query_classifier/qc_dummy/qc_dummy.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/qc_sqlite/qc_sqlite.c b/query_classifier/qc_sqlite/qc_sqlite.c index 4204c674c..2f561d1d0 100644 --- a/query_classifier/qc_sqlite/qc_sqlite.c +++ b/query_classifier/qc_sqlite/qc_sqlite.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/qc_sqlite/qc_sqlite3.c b/query_classifier/qc_sqlite/qc_sqlite3.c index 3893257dc..808e08d33 100644 --- a/query_classifier/qc_sqlite/qc_sqlite3.c +++ b/query_classifier/qc_sqlite/qc_sqlite3.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/test/canonical_tests/canonizer.c b/query_classifier/test/canonical_tests/canonizer.c index ad8e5fcba..7084d559c 100644 --- a/query_classifier/test/canonical_tests/canonizer.c +++ b/query_classifier/test/canonical_tests/canonizer.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/test/classify.c b/query_classifier/test/classify.c index 533f42b65..79bfeeb50 100644 --- a/query_classifier/test/classify.c +++ b/query_classifier/test/classify.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/test/compare.cc b/query_classifier/test/compare.cc index 7b6fc611e..899739cf0 100644 --- a/query_classifier/test/compare.cc +++ b/query_classifier/test/compare.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/test/crash_qc_sqlite.c b/query_classifier/test/crash_qc_sqlite.c index d4ac4e321..f54fb7add 100644 --- a/query_classifier/test/crash_qc_sqlite.c +++ b/query_classifier/test/crash_qc_sqlite.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-01-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/test/testreader.cc b/query_classifier/test/testreader.cc index b7aadc057..6b61e35fc 100644 --- a/query_classifier/test/testreader.cc +++ b/query_classifier/test/testreader.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/query_classifier/test/testreader.hh b/query_classifier/test/testreader.hh index 68b930a5c..3bde69461 100644 --- a/query_classifier/test/testreader.hh +++ b/query_classifier/test/testreader.hh @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/rabbitmq_consumer/consumer.c b/rabbitmq_consumer/consumer.c index 1595c0dd9..bd5570a77 100644 --- a/rabbitmq_consumer/consumer.c +++ b/rabbitmq_consumer/consumer.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/script/create_grants b/script/create_grants index 348692a5a..091915c7f 100755 --- a/script/create_grants +++ b/script/create_grants @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/script/update-change-date b/script/update-change-date index 2d4eb4fdd..64b1ce032 100755 --- a/script/update-change-date +++ b/script/update-change-date @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/script/update-license-url b/script/update-license-url index 9ba9469da..102575d56 100755 --- a/script/update-license-url +++ b/script/update-license-url @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/server/core/admin.cc b/server/core/admin.cc index 8f83c071b..fc484dda4 100644 --- a/server/core/admin.cc +++ b/server/core/admin.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -75,7 +75,7 @@ static inline size_t request_data_length(MHD_Connection *connection) static bool modifies_data(MHD_Connection *connection, string method) { return (method == MHD_HTTP_METHOD_POST || method == MHD_HTTP_METHOD_PUT || - method == MHD_HTTP_METHOD_DELETE) && + method == MHD_HTTP_METHOD_DELETE || method == MHD_HTTP_METHOD_PATCH) && request_data_length(connection); } @@ -105,6 +105,8 @@ int Client::process(string url, string method, const char* upload_data, size_t * HttpRequest request(m_connection, url, method, json); HttpResponse reply(MHD_HTTP_NOT_FOUND); + MXS_DEBUG("Request:\n%s", request.to_string().c_str()); + if (url == "/") { // Respond to pings with 200 OK diff --git a/server/core/adminusers.cc b/server/core/adminusers.cc index 155ea5c3d..9b6efe9ea 100644 --- a/server/core/adminusers.cc +++ b/server/core/adminusers.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/alloc.cc b/server/core/alloc.cc index 745bb1bbe..8fcc1e3ff 100644 --- a/server/core/alloc.cc +++ b/server/core/alloc.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/atomic.cc b/server/core/atomic.cc index 1445d9b65..e2a8119dd 100644 --- a/server/core/atomic.cc +++ b/server/core/atomic.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/authenticator.cc b/server/core/authenticator.cc index 3626c1255..9c54080ba 100644 --- a/server/core/authenticator.cc +++ b/server/core/authenticator.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/buffer.cc b/server/core/buffer.cc index bf19c5e6f..e77cbc1ab 100644 --- a/server/core/buffer.cc +++ b/server/core/buffer.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/config.cc b/server/core/config.cc index a392365ec..d53baa904 100644 --- a/server/core/config.cc +++ b/server/core/config.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -92,6 +92,7 @@ const char CN_LOG_THROTTLING[] = "log_throttling"; const char CN_MAXSCALE[] = "maxscale"; const char CN_MAX_CONNECTIONS[] = "max_connections"; const char CN_MAX_RETRY_INTERVAL[] = "max_retry_interval"; +const char CN_METHOD[] = "method"; const char CN_MODULE[] = "module"; const char CN_MODULES[] = "modules"; const char CN_MODULE_COMMAND[] = "module_command"; @@ -160,6 +161,11 @@ static void global_defaults(); static void feedback_defaults(); static bool check_config_objects(CONFIG_CONTEXT *context); static int maxscale_getline(char** dest, int* size, FILE* file); +static bool check_first_last_char(const char* string, char expected); +static void remove_first_last_char(char* value); +static bool test_regex_string_validity(const char* regex_string); +static bool compile_regex_string(const char* regex_string, bool jit_enabled, uint32_t options, + pcre2_code** output_code, uint32_t* output_capcount); int config_get_ifaddr(unsigned char *output); static int config_get_release_string(char* release); @@ -1232,6 +1238,27 @@ char* config_copy_string(const MXS_CONFIG_PARAMETER *params, const char *key) return rval; } +pcre2_code* config_get_compiled_regex(const MXS_CONFIG_PARAMETER *params, + const char *key, uint32_t options) +{ + pcre2_code* code = NULL; + uint32_t capcount = 0; + config_get_compiled_regex_capcount(params, key, options, &code, &capcount); + return code; +} + +bool config_get_compiled_regex_capcount(const MXS_CONFIG_PARAMETER *params, + const char *key, uint32_t options, + pcre2_code** output_code, + uint32_t* output_capcount) +{ + const char* regex_string = config_get_string(params, key); + uint32_t jit_available = 0; + pcre2_config(PCRE2_CONFIG_JIT, &jit_available); + return compile_regex_string(regex_string, jit_available, options, + output_code, output_capcount); +} + MXS_CONFIG_PARAMETER* config_clone_param(const MXS_CONFIG_PARAMETER* param) { MXS_CONFIG_PARAMETER *p2 = (MXS_CONFIG_PARAMETER*)MXS_MALLOC(sizeof(MXS_CONFIG_PARAMETER)); @@ -3606,12 +3633,8 @@ void config_fix_param(const MXS_MODULE_PARAM *params, MXS_CONFIG_PARAMETER *p) break; case MXS_MODULE_PARAM_QUOTEDSTRING: - { // Remove the '"':s from the ends of the string - char* value = p->value; - size_t len = strlen(value); - value[len - 1] = '\0'; - memmove(value, value + 1, len - 1); - } + case MXS_MODULE_PARAM_REGEX: + remove_first_last_char(p->value); break; default: @@ -3711,14 +3734,7 @@ bool config_param_is_valid(const MXS_MODULE_PARAM *params, const char *key, break; case MXS_MODULE_PARAM_QUOTEDSTRING: - valid = false; - { - size_t len = strlen(value); - if ((len >= 2) && (value[0] == '"') && (value[len - 1] == '"')) - { - valid = true; - } - } + valid = check_first_last_char(value, '"'); break; case MXS_MODULE_PARAM_ENUM: @@ -3798,6 +3814,10 @@ bool config_param_is_valid(const MXS_MODULE_PARAM *params, const char *key, valid = check_path_parameter(¶ms[i], value); break; + case MXS_MODULE_PARAM_REGEX: + valid = test_regex_string_validity(value); + break; + default: MXS_ERROR("Unexpected module parameter type: %d", params[i].type); ss_dassert(false); @@ -3996,3 +4016,127 @@ bool config_global_serialize() return rval; } + +/** + * Test if first and last char in the string are as expected. + * + * @param string Input string + * @param expected Required character + * @return True, if string has at least two chars and both first and last char + * equal @c expected + */ +static bool check_first_last_char(const char* string, char expected) +{ + bool valid = false; + { + size_t len = strlen(string); + if ((len >= 2) && (string[0] == expected) && (string[len - 1] == expected)) + { + valid = true; + } + } + return valid; +} + +/** + * Chop a char off from both ends of the string. + * + * @param value Input string + */ +static void remove_first_last_char(char* value) +{ + size_t len = strlen(value); + value[len - 1] = '\0'; + memmove(value, value + 1, len - 1); +} + +/** + * Compile a regex string using PCRE2 using the settings provided. + * + * @param regex_string The string to compile + * @param jit_enabled Enable JIT compilation. If not available, a notice is printed. + * @param options PCRE2 compilation options + * @param output_code Output for the regex machine code + * @param output_capcount Output for the capture count of the regex. Add one to + * get the optimal ovector size. + * @return True on success. On error, nothing is written to the outputs. + */ +static bool compile_regex_string(const char* regex_string, bool jit_enabled, + uint32_t options, pcre2_code** output_code, + uint32_t* output_capcount) +{ + bool success = true; + int errorcode = -1; + PCRE2_SIZE error_offset = -1; + pcre2_code* machine = + pcre2_compile((PCRE2_SPTR) regex_string, PCRE2_ZERO_TERMINATED, options, + &errorcode, &error_offset, NULL); + if (machine) + { + if (jit_enabled) + { + // Try to compile even further for faster matching + if (pcre2_jit_compile(machine, PCRE2_JIT_COMPLETE) < 0) + { + MXS_WARNING("PCRE2 JIT compilation of pattern '%s' failed, " + "falling back to normal compilation.", regex_string); + } + + } + /* Check what is the required match_data size for this pattern. + */ + uint32_t capcount = 0; + int ret_info = pcre2_pattern_info(machine, PCRE2_INFO_CAPTURECOUNT, &capcount); + if (ret_info != 0) + { + MXS_PCRE2_PRINT_ERROR(ret_info); + success = false; + } + if (success) + { + *output_code = machine; + *output_capcount = capcount; + } + } + else + { + MXS_ERROR("Invalid PCRE2 regular expression '%s' (position '%zu').", + regex_string, error_offset); + MXS_PCRE2_PRINT_ERROR(errorcode); + success = false; + } + + if (!success && machine) + { + pcre2_code_free(machine); + } + return success; +} + +/** + * Test if the given string is a valid MaxScale regular expression and can be + * compiled to a regex machine using PCRE2. + * + * @param regex_string The input string + * @return True if compilation succeeded, false if string is invalid or cannot + * be compiled. + */ +static bool test_regex_string_validity(const char* regex_string) +{ + if (!check_first_last_char(regex_string, '/')) + { + return false; + } + char regex_copy[strlen(regex_string) + 1]; + strcpy(regex_copy, regex_string); + remove_first_last_char(regex_copy); + + pcre2_code* code; + uint32_t capcount; + if (compile_regex_string(regex_copy, false, 0, &code, &capcount)) + { + pcre2_code_free(code); + return true; + } + return false; +} diff --git a/server/core/config_runtime.cc b/server/core/config_runtime.cc index 408bf2877..af61df5d5 100644 --- a/server/core/config_runtime.cc +++ b/server/core/config_runtime.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/dcb.cc b/server/core/dcb.cc index 7ec94e2e8..72c45cae8 100644 --- a/server/core/dcb.cc +++ b/server/core/dcb.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -69,19 +69,6 @@ using maxscale::Semaphore; /* A DCB with null values, used for initialization */ static DCB dcb_initialized; -/** Fake epoll event struct */ -typedef struct fake_event -{ - DCB *dcb; /*< The DCB where this event was generated */ - GWBUF *data; /*< Fake data, placed in the DCB's read queue */ - uint32_t event; /*< The EPOLL event type */ - struct fake_event *tail; /*< The last event */ - struct fake_event *next; /*< The next event */ -} fake_event_t; - -static fake_event_t **fake_events; /*< Thread-specific fake event queue */ -static SPINLOCK *fake_event_lock; - static DCB **all_dcbs; static SPINLOCK *all_dcbs_lock; static DCB **zombies; @@ -106,9 +93,7 @@ void dcb_global_init() if ((zombies = (DCB**)MXS_CALLOC(nthreads, sizeof(DCB*))) == NULL || (all_dcbs = (DCB**)MXS_CALLOC(nthreads, sizeof(DCB*))) == NULL || (all_dcbs_lock = (SPINLOCK*)MXS_CALLOC(nthreads, sizeof(SPINLOCK))) == NULL || - (nzombies = (int*)MXS_CALLOC(nthreads, sizeof(int))) == NULL || - (fake_events = (fake_event_t**)MXS_CALLOC(nthreads, sizeof(fake_event_t*))) == NULL || - (fake_event_lock = (SPINLOCK*)MXS_CALLOC(nthreads, sizeof(SPINLOCK))) == NULL) + (nzombies = (int*)MXS_CALLOC(nthreads, sizeof(int))) == NULL) { MXS_OOM(); raise(SIGABRT); @@ -118,11 +103,6 @@ void dcb_global_init() { spinlock_init(&all_dcbs_lock[i]); } - - for (int i = 0; i < nthreads; i++) - { - spinlock_init(&fake_event_lock[i]); - } } void dcb_finish() @@ -159,8 +139,7 @@ static DCB *dcb_find_free(); static void dcb_remove_from_list(DCB *dcb); static uint32_t dcb_poll_handler(MXS_POLL_DATA *data, int thread_id, uint32_t events); -static uint32_t dcb_process_poll_events(DCB *dcb, int thread_id, uint32_t ev); -static void dcb_process_fake_events(DCB *dcb, int thread_id); +static uint32_t dcb_process_poll_events(DCB *dcb, uint32_t ev); static bool dcb_session_check(DCB *dcb, const char *); uint64_t dcb_get_session_id(DCB *dcb) @@ -232,68 +211,6 @@ dcb_free(DCB *dcb) dcb_close(dcb); } -/* - * Clone a DCB for internal use, mostly used for specialist filters - * to create dummy clients based on real clients. - * - * @param orig The DCB to clone - * @return A DCB that can be used as a client - */ -DCB * -dcb_clone(DCB *orig) -{ - char *remote = orig->remote; - - if (remote) - { - remote = MXS_STRDUP(remote); - if (!remote) - { - return NULL; - } - } - - char *user = orig->user; - if (user) - { - user = MXS_STRDUP(user); - if (!user) - { - MXS_FREE(remote); - return NULL; - } - } - - DCB *clonedcb = dcb_alloc(orig->dcb_role, orig->listener); - - if (clonedcb) - { - clonedcb->fd = DCBFD_CLOSED; - clonedcb->flags |= DCBF_CLONE; - clonedcb->state = orig->state; - clonedcb->data = orig->data; - clonedcb->ssl_state = orig->ssl_state; - clonedcb->remote = remote; - clonedcb->user = user; - clonedcb->poll.thread.id = orig->poll.thread.id; - clonedcb->protocol = orig->protocol; - - clonedcb->func.write = dcb_null_write; - /** - * Close triggers closing of router session as well which is needed. - */ - clonedcb->func.close = orig->func.close; - clonedcb->func.auth = dcb_null_auth; - } - else - { - MXS_FREE(remote); - MXS_FREE(user); - } - - return clonedcb; -} - /** * Free a DCB and remove it from the chain of all DCBs * @@ -357,11 +274,11 @@ dcb_free_all_memory(DCB *dcb) { DCB_CALLBACK *cb_dcb; - if (dcb->protocol && (!DCB_IS_CLONE(dcb))) + if (dcb->protocol) { MXS_FREE(dcb->protocol); } - if (dcb->data && dcb->authfunc.free && !DCB_IS_CLONE(dcb)) + if (dcb->data && dcb->authfunc.free) { dcb->authfunc.free(dcb); dcb->data = NULL; @@ -771,8 +688,7 @@ int dcb_read(DCB *dcb, if (dcb->fd <= 0) { - MXS_ERROR("Read failed, dcb is %s.", dcb->fd == DCBFD_CLOSED ? - "closed" : "cloned, not readable"); + MXS_ERROR("Read failed, dcb is closed."); return 0; } @@ -935,8 +851,7 @@ dcb_read_SSL(DCB *dcb, GWBUF **head) if (dcb->fd <= 0) { - MXS_ERROR("Read failed, dcb is %s.", dcb->fd == DCBFD_CLOSED ? - "closed" : "cloned, not readable"); + MXS_ERROR("Read failed, dcb is closed."); return -1; } @@ -1123,8 +1038,7 @@ dcb_write_parameter_check(DCB *dcb, GWBUF *queue) if (dcb->fd <= 0) { - MXS_ERROR("Write failed, dcb is %s.", - dcb->fd == DCBFD_CLOSED ? "closed" : "cloned, not writable"); + MXS_ERROR("Write failed, dcb is closed."); gwbuf_free(queue); return false; } @@ -1552,10 +1466,7 @@ dprintOneDCB(DCB *pdcb, DCB *dcb) dcb_printf(pdcb, "\t\tNo. of Accepts: %d\n", dcb->stats.n_accepts); dcb_printf(pdcb, "\t\tNo. of High Water Events: %d\n", dcb->stats.n_high_water); dcb_printf(pdcb, "\t\tNo. of Low Water Events: %d\n", dcb->stats.n_low_water); - if (dcb->flags & DCBF_CLONE) - { - dcb_printf(pdcb, "\t\tDCB is a clone.\n"); - } + if (dcb->persistentstart) { char buff[20]; @@ -1717,10 +1628,6 @@ dprintDCB(DCB *pdcb, DCB *dcb) dcb_printf(pdcb, "\t\tPending events in the queue: %x %s\n", dcb->evq.pending_events, dcb->evq.processing ? "(processing)" : ""); } - if (dcb->flags & DCBF_CLONE) - { - dcb_printf(pdcb, "\t\tDCB is a clone.\n"); - } if (dcb->persistentstart) { @@ -2098,12 +2005,6 @@ static void dcb_hangup_foreach_worker(int thread_id, struct server* server) dcb->server == server) { poll_fake_hangup_event(dcb); - // dcb_hangup_foreach_worker() is called via the message loop, - // so immediately after the hangup event has been added, we can - // also process it. Indeed, it is necessary to do that because - // otherwise, unless there is a real event for the DCB descriptor, - // the fake event would not be handled. - dcb_process_fake_events(dcb, thread_id); } } } @@ -2122,41 +2023,6 @@ dcb_hangup_foreach(struct server* server) Worker::broadcast_message(MXS_WORKER_MSG_CALL, arg1, arg2); } -/** - * Null protocol write routine used for cloned dcb's. It merely consumes - * buffers written on the cloned DCB and sets the DCB_REPLIED flag. - * - * @param dcb The descriptor control block - * @param buf The buffer being written - * @return Always returns a good write operation result - */ -static int -dcb_null_write(DCB *dcb, GWBUF *buf) -{ - while (buf) - { - buf = gwbuf_consume(buf, GWBUF_LENGTH(buf)); - } - - dcb->flags |= DCBF_REPLIED; - - return 1; -} - -/** - * Null protocol auth operation for use by cloned DCB's. - * - * @param dcb The DCB being closed. - * @param server The server to auth against - * @param session The user session - * @param buf The buffer with the new auth request - */ -static int -dcb_null_auth(DCB *dcb, SERVER *server, MXS_SESSION *session, GWBUF *buf) -{ - return 0; -} - /** * Check persistent pool for expiry or excess size and count * @@ -3141,9 +3007,10 @@ int dcb_get_port(const DCB *dcb) return rval; } -static uint32_t dcb_process_poll_events(DCB *dcb, int thread_id, uint32_t events) +static uint32_t dcb_process_poll_events(DCB *dcb, uint32_t events) { - ss_dassert(dcb->poll.thread.id == thread_id || dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER); + ss_dassert(dcb->poll.thread.id == mxs::Worker::get_current_id() || + dcb->dcb_role == DCB_ROLE_SERVICE_LISTENER); CHK_DCB(dcb); @@ -3305,89 +3172,56 @@ static uint32_t dcb_process_poll_events(DCB *dcb, int thread_id, uint32_t events return rc; } -static void dcb_process_fake_events(DCB *dcb, int thread_id) -{ - // Since this loop is now here, it will be processed once per extracted epoll - // event and not once per extraction of events, but as this is temporary code - // that's ok. Once it'll be possible to send cross-thread messages, the need - // for the fake event list will disappear. - - fake_event_t *event = NULL; - - /** It is very likely that the queue is empty so to avoid hitting the - * spinlock every time we receive events, we only do a dirty read. Currently, - * only the monitors inject fake events from external threads. */ - if (fake_events[thread_id]) - { - spinlock_acquire(&fake_event_lock[thread_id]); - event = fake_events[thread_id]; - fake_events[thread_id] = NULL; - spinlock_release(&fake_event_lock[thread_id]); - } - - while (event) - { - event->dcb->dcb_fakequeue = event->data; - dcb_process_poll_events(event->dcb, thread_id, event->event); - fake_event_t *tmp = event; - event = event->next; - MXS_FREE(tmp); - } -} - static uint32_t dcb_poll_handler(MXS_POLL_DATA *data, int thread_id, uint32_t events) { DCB *dcb = (DCB*)data; - uint32_t rc = dcb_process_poll_events(dcb, thread_id, events); - - dcb_process_fake_events(dcb, thread_id); - - return rc; + return dcb_process_poll_events(dcb, events); } -static void poll_add_event_to_dcb(DCB* dcb, - GWBUF* buf, - uint32_t ev) +class FakeEventTask: public mxs::WorkerDisposableTask { - fake_event_t *event = (fake_event_t*)MXS_MALLOC(sizeof(*event)); + FakeEventTask(const FakeEventTask&); + FakeEventTask& operator=(const FakeEventTask&); - if (event) +public: + FakeEventTask(DCB* dcb, GWBUF* buf, uint32_t ev): + m_dcb(dcb), + m_buffer(buf), + m_ev(ev) { - event->data = buf; - event->dcb = dcb; - event->event = ev; - event->next = NULL; - event->tail = event; + } - int thr = dcb->poll.thread.id; + void execute(Worker& worker) + { + m_dcb->dcb_fakequeue = m_buffer; + dcb_process_poll_events(m_dcb, m_ev); + } - /** It is possible that a housekeeper or a monitor thread inserts a fake - * event into the thread's event queue which is why the operation needs - * to be protected by a spinlock */ - spinlock_acquire(&fake_event_lock[thr]); +private: + DCB* m_dcb; + GWBUF* m_buffer; + uint32_t m_ev; +}; - if (fake_events[thr]) - { - fake_events[thr]->tail->next = event; - fake_events[thr]->tail = event; - } - else - { - fake_events[thr] = event; - } +static void poll_add_event_to_dcb(DCB* dcb, GWBUF* buf, uint32_t ev) +{ + FakeEventTask* task = new (std::nothrow) FakeEventTask(dcb, buf, ev); - spinlock_release(&fake_event_lock[thr]); + if (task) + { + Worker* worker = Worker::get(dcb->poll.thread.id); + worker->post(std::auto_ptr(task), mxs::Worker::EXECUTE_QUEUED); + } + else + { + MXS_OOM(); } } void poll_add_epollin_event_to_dcb(DCB* dcb, GWBUF* buf) { - uint32_t ev; - - ev = EPOLLIN; - - poll_add_event_to_dcb(dcb, buf, ev); + poll_add_event_to_dcb(dcb, buf, EPOLLIN); } void poll_fake_write_event(DCB *dcb) @@ -3565,12 +3399,9 @@ int poll_remove_dcb(DCB *dcb) /** * Only positive fds can be removed from epoll set. - * Cloned DCBs can have a state of DCB_STATE_POLLING but are not in - * the epoll set and do not have a valid file descriptor. Hence the - * only action for them is already done - the change of state to - * DCB_STATE_NOPOLLING. */ dcbfd = dcb->fd; + ss_dassert(dcbfd > 0); if (dcbfd > 0) { diff --git a/server/core/doxygen.c b/server/core/doxygen.c index 227ac454a..6a1538ea2 100644 --- a/server/core/doxygen.c +++ b/server/core/doxygen.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/externcmd.cc b/server/core/externcmd.cc index abfea8b48..a922dc6fd 100644 --- a/server/core/externcmd.cc +++ b/server/core/externcmd.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/filter.cc b/server/core/filter.cc index 845ca7d11..68711a591 100644 --- a/server/core/filter.cc +++ b/server/core/filter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/gateway.cc b/server/core/gateway.cc index e097f2b93..58fe215bc 100644 --- a/server/core/gateway.cc +++ b/server/core/gateway.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/hashtable.cc b/server/core/hashtable.cc index 2658498be..82e4fe91a 100644 --- a/server/core/hashtable.cc +++ b/server/core/hashtable.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/hint.cc b/server/core/hint.cc index 08d4932be..d1409537f 100644 --- a/server/core/hint.cc +++ b/server/core/hint.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/housekeeper.cc b/server/core/housekeeper.cc index 092e6e494..41d365a1e 100644 --- a/server/core/housekeeper.cc +++ b/server/core/housekeeper.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/httprequest.cc b/server/core/httprequest.cc index 2611dcfc1..26408b858 100644 --- a/server/core/httprequest.cc +++ b/server/core/httprequest.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -16,6 +16,7 @@ #include #include +#include using std::string; using std::deque; @@ -128,3 +129,78 @@ bool HttpRequest::validate_api_version() return rval; } + +namespace +{ +struct ValueFormatter +{ + std::stringstream ss; + const char* separator; + const char* terminator; + + ValueFormatter(const char* sep, const char* term): + separator(sep), terminator(term) + { + } +}; +} + +static int value_combine_cb(void *cls, + enum MHD_ValueKind kind, + const char *key, + const char *value) +{ + ValueFormatter& cnf = *(ValueFormatter*)cls; + + cnf.ss << key; + + if (value) + { + cnf.ss << cnf.separator << value; + } + + cnf.ss << cnf.terminator; + + return MHD_YES; +} + +std::string HttpRequest::to_string() const +{ + std::stringstream req; + req << m_verb << " " << m_resource; + + ValueFormatter opts("=", "&"); + MHD_get_connection_values(m_connection, MHD_GET_ARGUMENT_KIND, + value_combine_cb, &opts); + + std::string optstr = opts.ss.str(); + size_t len = optstr.length(); + + if (len) + { + req << "?"; + + if (optstr[len - 1] == '&') + { + optstr.erase(len - 1); + } + } + + req << optstr << " " << "HTTP/1.1" << "\r\n"; + + ValueFormatter hdr(": ", "\r\n"); + MHD_get_connection_values(m_connection, MHD_HEADER_KIND, + value_combine_cb, &hdr); + + std::string hdrstr = hdr.ss.str(); + + if (hdrstr.length()) + { + req << hdrstr; + } + + req << "\r\n"; + + req << m_json_string; + return req.str(); +} diff --git a/server/core/httpresponse.cc b/server/core/httpresponse.cc index 557ce4c8e..aee90ef67 100644 --- a/server/core/httpresponse.cc +++ b/server/core/httpresponse.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/json_api.cc b/server/core/json_api.cc index 8205afbc5..555af8fb4 100644 --- a/server/core/json_api.cc +++ b/server/core/json_api.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/listener.cc b/server/core/listener.cc index 3f537b83e..51d6509a9 100644 --- a/server/core/listener.cc +++ b/server/core/listener.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/load_utils.cc b/server/core/load_utils.cc index 0f6995d6a..45c8f6881 100644 --- a/server/core/load_utils.cc +++ b/server/core/load_utils.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -417,6 +417,12 @@ struct cb_param bool modulecmd_cb(const MODULECMD *cmd, void *data) { + if (modulecmd_requires_output_dcb(cmd)) + { + /** Module requires an output DCB, don't print it */ + return true; + } + cb_param* d = static_cast(data); json_t* obj = json_object(); @@ -424,6 +430,8 @@ bool modulecmd_cb(const MODULECMD *cmd, void *data) json_object_set_new(obj, CN_TYPE, json_string(CN_MODULE_COMMAND)); json_t* attr = json_object(); + const char* method = MODULECMD_MODIFIES_DATA(cmd) ? "POST" : "GET"; + json_object_set_new(attr, CN_METHOD, json_string(method)); json_object_set_new(attr, CN_ARG_MIN, json_integer(cmd->arg_count_min)); json_object_set_new(attr, CN_ARG_MAX, json_integer(cmd->arg_count_max)); diff --git a/server/core/log_manager.cc b/server/core/log_manager.cc index 3176481f0..bec97ab72 100644 --- a/server/core/log_manager.cc +++ b/server/core/log_manager.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxkeys.c b/server/core/maxkeys.c index 3446f7bfd..ad928099b 100644 --- a/server/core/maxkeys.c +++ b/server/core/maxkeys.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxpasswd.c b/server/core/maxpasswd.c index fcb1bf4e4..652925157 100644 --- a/server/core/maxpasswd.c +++ b/server/core/maxpasswd.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/admin.hh b/server/core/maxscale/admin.hh index 46959145a..876c75b29 100644 --- a/server/core/maxscale/admin.hh +++ b/server/core/maxscale/admin.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/config.h b/server/core/maxscale/config.h index 3fb239b6d..667fa651e 100644 --- a/server/core/maxscale/config.h +++ b/server/core/maxscale/config.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/config_runtime.h b/server/core/maxscale/config_runtime.h index d52f2c0ff..5014bddc0 100644 --- a/server/core/maxscale/config_runtime.h +++ b/server/core/maxscale/config_runtime.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/externcmd.h b/server/core/maxscale/externcmd.h index 346c5fda6..c23b3ac45 100644 --- a/server/core/maxscale/externcmd.h +++ b/server/core/maxscale/externcmd.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/filter.h b/server/core/maxscale/filter.h index 4ebc40602..0c9751175 100644 --- a/server/core/maxscale/filter.h +++ b/server/core/maxscale/filter.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/httprequest.hh b/server/core/maxscale/httprequest.hh index 3939932a0..bb1b2a2a8 100644 --- a/server/core/maxscale/httprequest.hh +++ b/server/core/maxscale/httprequest.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -44,6 +44,35 @@ static int value_iterator(void *cls, return MHD_YES; } +static int value_sum_iterator(void *cls, + enum MHD_ValueKind kind, + const char *key, + const char *value) +{ + size_t& count = *(size_t*)cls; + count++; + return MHD_YES; +} + +static int value_copy_iterator(void *cls, + enum MHD_ValueKind kind, + const char *key, + const char *value) +{ + std::string k = key; + if (value) + { + k += "="; + k += value; + } + + char**& dest = *(char***) cls; + *dest = MXS_STRDUP_A(k.c_str()); + dest++; + + return MHD_YES; +} + class HttpRequest { HttpRequest(const HttpRequest&); @@ -106,6 +135,34 @@ public: return p.second; } + /** + * @brief Get request option count + * + * @return Number of options in the request + */ + size_t get_option_count() const + { + size_t rval = 0; + MHD_get_connection_values(m_connection, MHD_GET_ARGUMENT_KIND, + value_sum_iterator, &rval); + + return rval; + } + + /** + * @brief Copy options to an array + * + * The @c dest parameter must be able to hold at least get_option_count() + * pointers. The values stored need to be freed by the caller. + * + * @param dest Destination where options are copied + */ + void copy_options(char** dest) const + { + MHD_get_connection_values(m_connection, MHD_GET_ARGUMENT_KIND, + value_copy_iterator, &dest); + } + /** * @brief Return request body * @@ -131,7 +188,7 @@ public: * * @return The complete request URI */ - const std::string& get_uri() const + std::string get_uri() const { return m_resource; } @@ -143,11 +200,39 @@ public: * * @return The request URI part or empty string if no part was found */ - const std::string uri_part(uint32_t idx) const + std::string uri_part(uint32_t idx) const { return m_resource_parts.size() > idx ? m_resource_parts[idx] : ""; } + /** + * @brief Return a segment of the URI + * + * Combines a range of parts into a segment of the URI. Each part is + * separated by a forward slash. + * + * @param start Start of range + * @param end End of range, not inclusive + * + * @return The URI segment that matches this range + */ + std::string uri_segment(uint32_t start, uint32_t end) const + { + std::string rval; + + for (uint32_t i = start; i < end && i < m_resource_parts.size(); i++) + { + if (i > start) + { + rval += "/"; + } + + rval += m_resource_parts[i]; + } + + return rval; + } + /** * @brief Return how many parts are in the URI * @@ -158,21 +243,35 @@ public: return m_resource_parts.size(); } - /** - * @brief Return the last part of the URI - * - * @return The last URI part - */ - const std::string last_uri_part() const + /** + * @brief Return the last part of the URI + * + * @return The last URI part + */ + std::string last_uri_part() const { return m_resource_parts.size() > 0 ? m_resource_parts[m_resource_parts.size() - 1] : ""; } + /** + * @brief Return the value of the Host header + * + * @return The value of the Host header + */ const char* host() const { return m_hostname.c_str(); } + /** + * @brief Convert request to string format + * + * The returned string should be logically equivalent to the original request. + * + * @return The request in string format + */ + std::string to_string() const; + /** * @brief Drop the API version prefix * diff --git a/server/core/maxscale/httpresponse.hh b/server/core/maxscale/httpresponse.hh index 4ed545ff5..cedbccd12 100644 --- a/server/core/maxscale/httpresponse.hh +++ b/server/core/maxscale/httpresponse.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/maxscale.h b/server/core/maxscale/maxscale.h index 988444497..e4ec7772d 100644 --- a/server/core/maxscale/maxscale.h +++ b/server/core/maxscale/maxscale.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/messagequeue.hh b/server/core/maxscale/messagequeue.hh index 7aa2cd9ad..8278f670b 100644 --- a/server/core/maxscale/messagequeue.hh +++ b/server/core/maxscale/messagequeue.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/mlist.h b/server/core/maxscale/mlist.h index 5a9b8ddbd..949d8aacc 100644 --- a/server/core/maxscale/mlist.h +++ b/server/core/maxscale/mlist.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/modules.h b/server/core/maxscale/modules.h index 2d6748c16..f962543db 100644 --- a/server/core/maxscale/modules.h +++ b/server/core/maxscale/modules.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/monitor.h b/server/core/maxscale/monitor.h index 938c03d5d..c9229258e 100644 --- a/server/core/maxscale/monitor.h +++ b/server/core/maxscale/monitor.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/poll.h b/server/core/maxscale/poll.h index 1188f3daf..c5cb95104 100644 --- a/server/core/maxscale/poll.h +++ b/server/core/maxscale/poll.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/query_classifier.h b/server/core/maxscale/query_classifier.h index 3de89dff9..13c72ecac 100644 --- a/server/core/maxscale/query_classifier.h +++ b/server/core/maxscale/query_classifier.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/queuemanager.h b/server/core/maxscale/queuemanager.h index a013f4d2b..32851d50b 100644 --- a/server/core/maxscale/queuemanager.h +++ b/server/core/maxscale/queuemanager.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/resource.hh b/server/core/maxscale/resource.hh index 15a4dc284..7b8e3f2c3 100644 --- a/server/core/maxscale/resource.hh +++ b/server/core/maxscale/resource.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -64,6 +64,7 @@ private: ResourceCallback m_cb; /**< Resource handler callback */ std::deque m_path; /**< Path components */ + bool m_is_glob; /**< Does this path glob? */ }; /** diff --git a/server/core/maxscale/secrets.h b/server/core/maxscale/secrets.h index 2ccae9996..97bdbd901 100644 --- a/server/core/maxscale/secrets.h +++ b/server/core/maxscale/secrets.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/service.h b/server/core/maxscale/service.h index b8347fb1b..1310b5db6 100644 --- a/server/core/maxscale/service.h +++ b/server/core/maxscale/service.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/session.h b/server/core/maxscale/session.h index 44f9f4dd4..8471d397b 100644 --- a/server/core/maxscale/session.h +++ b/server/core/maxscale/session.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/session.hh b/server/core/maxscale/session.hh index f5e0b48e0..f74b7efff 100644 --- a/server/core/maxscale/session.hh +++ b/server/core/maxscale/session.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/skygw_utils.h b/server/core/maxscale/skygw_utils.h index 3a4b08328..d752950e5 100644 --- a/server/core/maxscale/skygw_utils.h +++ b/server/core/maxscale/skygw_utils.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/statistics.h b/server/core/maxscale/statistics.h index 6567316be..7b838ac09 100644 --- a/server/core/maxscale/statistics.h +++ b/server/core/maxscale/statistics.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/trxboundaryparser.hh b/server/core/maxscale/trxboundaryparser.hh index a311f2699..197648a8a 100644 --- a/server/core/maxscale/trxboundaryparser.hh +++ b/server/core/maxscale/trxboundaryparser.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/worker.h b/server/core/maxscale/worker.h index 12de89b0b..bae71d9bf 100644 --- a/server/core/maxscale/worker.h +++ b/server/core/maxscale/worker.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/worker.hh b/server/core/maxscale/worker.hh index 98ab353f3..7ee5808da 100644 --- a/server/core/maxscale/worker.hh +++ b/server/core/maxscale/worker.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale/workertask.hh b/server/core/maxscale/workertask.hh index 408533d39..7510ee8d8 100644 --- a/server/core/maxscale/workertask.hh +++ b/server/core/maxscale/workertask.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/maxscale_pcre2.cc b/server/core/maxscale_pcre2.cc index 9459930e0..b1391e615 100644 --- a/server/core/maxscale_pcre2.cc +++ b/server/core/maxscale_pcre2.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/messagequeue.cc b/server/core/messagequeue.cc index 32ba5c46a..1da2b142d 100644 --- a/server/core/messagequeue.cc +++ b/server/core/messagequeue.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/misc.cc b/server/core/misc.cc index 101da1aca..43b931eb6 100644 --- a/server/core/misc.cc +++ b/server/core/misc.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/mlist.cc b/server/core/mlist.cc index 29de3a9ed..1c5557379 100644 --- a/server/core/mlist.cc +++ b/server/core/mlist.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/modulecmd.cc b/server/core/modulecmd.cc index b0fbeeac8..e6ccf39c1 100644 --- a/server/core/modulecmd.cc +++ b/server/core/modulecmd.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -137,8 +137,8 @@ static MODULECMD_DOMAIN* get_or_create_domain(const char *domain) } static MODULECMD* command_create(const char *identifier, const char *domain, - MODULECMDFN entry_point, int argc, - modulecmd_arg_type_t* argv) + enum modulecmd_type type, MODULECMDFN entry_point, + int argc, modulecmd_arg_type_t* argv) { ss_dassert((argc && argv) || (argc == 0 && argv == NULL)); MODULECMD *rval = (MODULECMD*)MXS_MALLOC(sizeof(*rval)); @@ -166,6 +166,7 @@ static MODULECMD* command_create(const char *identifier, const char *domain, types[0].description = ""; } + rval->type = type; rval->func = entry_point; rval->identifier = id; rval->domain = dm; @@ -413,7 +414,8 @@ static void free_argument(struct arg_node *arg) */ bool modulecmd_register_command(const char *domain, const char *identifier, - MODULECMDFN entry_point, int argc, modulecmd_arg_type_t *argv) + enum modulecmd_type type, MODULECMDFN entry_point, + int argc, modulecmd_arg_type_t *argv) { reset_error(); bool rval = false; @@ -430,7 +432,7 @@ bool modulecmd_register_command(const char *domain, const char *identifier, } else { - MODULECMD *cmd = command_create(identifier, domain, entry_point, argc, argv); + MODULECMD *cmd = command_create(identifier, domain, type, entry_point, argc, argv); if (cmd) { @@ -687,3 +689,17 @@ bool modulecmd_arg_is_present(const MODULECMD_ARG *arg, int idx) return arg->argc > idx && MODULECMD_GET_TYPE(&arg->argv[idx].type) != MODULECMD_ARG_NONE; } + +bool modulecmd_requires_output_dcb(const MODULECMD* cmd) +{ + for (int i = 0; i < cmd->arg_count_max; i++) + { + if (cmd->arg_types[i].type == MODULECMD_ARG_OUTPUT) + { + /** We can't call this as it requries a DCB for output so don't show it */ + return true; + } + } + + return false; +} diff --git a/server/core/modutil.cc b/server/core/modutil.cc index 91b734494..2ca4108cc 100644 --- a/server/core/modutil.cc +++ b/server/core/modutil.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/monitor.cc b/server/core/monitor.cc index 7e35a0c99..57abaf2d7 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/mysql_binlog.cc b/server/core/mysql_binlog.cc index 951e6eace..c51fbbce6 100644 --- a/server/core/mysql_binlog.cc +++ b/server/core/mysql_binlog.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/mysql_utils.cc b/server/core/mysql_utils.cc index 83cdd8f78..94f87bc3e 100644 --- a/server/core/mysql_utils.cc +++ b/server/core/mysql_utils.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/paths.cc b/server/core/paths.cc index 036c49c1b..7b9eed855 100644 --- a/server/core/paths.cc +++ b/server/core/paths.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/poll.cc b/server/core/poll.cc index 9085d4149..dfe16e87c 100644 --- a/server/core/poll.cc +++ b/server/core/poll.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/query_classifier.cc b/server/core/query_classifier.cc index 17b8c41fa..aa69e2ddf 100644 --- a/server/core/query_classifier.cc +++ b/server/core/query_classifier.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/queuemanager.cc b/server/core/queuemanager.cc index 72c2a1d37..937e32440 100644 --- a/server/core/queuemanager.cc +++ b/server/core/queuemanager.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/random_jkiss.cc b/server/core/random_jkiss.cc index 0ace1d816..2be0a9044 100644 --- a/server/core/random_jkiss.cc +++ b/server/core/random_jkiss.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/resource.cc b/server/core/resource.cc index 52ef6f0b0..f69542b8b 100644 --- a/server/core/resource.cc +++ b/server/core/resource.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -23,6 +23,7 @@ #include #include #include +#include #include "maxscale/httprequest.hh" #include "maxscale/httpresponse.hh" @@ -103,7 +104,8 @@ private: }; Resource::Resource(ResourceCallback cb, int components, ...) : - m_cb(cb) + m_cb(cb), + m_is_glob(false) { va_list args; va_start(args, components); @@ -112,6 +114,10 @@ Resource::Resource(ResourceCallback cb, int components, ...) : { string part = va_arg(args, const char*); m_path.push_back(part); + if (part == "?") + { + m_is_glob = true; + } } va_end(args); } @@ -124,11 +130,12 @@ bool Resource::match(const HttpRequest& request) const { bool rval = false; - if (request.uri_part_count() == m_path.size()) + if (request.uri_part_count() == m_path.size() || m_is_glob) { rval = true; + size_t parts = MXS_MIN(request.uri_part_count(), m_path.size()); - for (size_t i = 0; i < request.uri_part_count(); i++) + for (size_t i = 0; i < parts; i++) { if (m_path[i] != request.uri_part(i) && !matching_variable_path(m_path[i], request.uri_part(i))) @@ -185,6 +192,11 @@ bool Resource::matching_variable_path(const string& path, const string& target) } } } + else if (path == "?") + { + /** Wildcard match */ + rval = true; + } return rval; } @@ -550,6 +562,43 @@ HttpResponse cb_delete_user(const HttpRequest& request) return HttpResponse(MHD_HTTP_FORBIDDEN, runtime_get_json_error()); } +HttpResponse cb_modulecmd(const HttpRequest& request) +{ + std::string module = request.uri_part(2); + std::string identifier = request.uri_segment(3, request.uri_part_count()); + std::string verb = request.get_verb(); + + const MODULECMD* cmd = modulecmd_find_command(module.c_str(), identifier.c_str()); + + if (cmd && !modulecmd_requires_output_dcb(cmd)) + { + if ((!MODULECMD_MODIFIES_DATA(cmd) && verb == MHD_HTTP_METHOD_GET) || + (MODULECMD_MODIFIES_DATA(cmd) && verb == MHD_HTTP_METHOD_POST)) + { + int n_opts = (int)request.get_option_count(); + char* opts[n_opts]; + request.copy_options(opts); + + MODULECMD_ARG* args = modulecmd_arg_parse(cmd, n_opts, (const void**)opts); + bool rval = false; + + if (args) + { + rval = modulecmd_call_command(cmd, args); + } + + for (int i = 0; i < n_opts; i++) + { + MXS_FREE(opts[i]); + } + + return HttpResponse(rval ? MHD_HTTP_OK : MHD_HTTP_INTERNAL_SERVER_ERROR); + } + } + + return HttpResponse(MHD_HTTP_NOT_FOUND); +} + HttpResponse cb_send_ok(const HttpRequest& request) { return HttpResponse(MHD_HTTP_OK); @@ -563,6 +612,18 @@ public: typedef std::shared_ptr SResource; typedef list ResourceList; + /** + * Create REST API resources + * + * Each resource represents either a collection of resources, an individual + * resource, a sub-resource of a resource or an "action" endpoint which + * executes an action. + * + * The resources are defined by the Resource class. Each resource maps to a + * HTTP method and one or more paths. The path components can contain either + * an explicit string, a colon-prefixed object type or a question mark for + * a path component that matches everything. + */ RootResource() { // Special resources required by OPTION etc. @@ -594,6 +655,9 @@ public: m_get.push_back(SResource(new Resource(cb_all_modules, 2, "maxscale", "modules"))); m_get.push_back(SResource(new Resource(cb_module, 3, "maxscale", "modules", ":module"))); + /** For all read-only module commands */ + m_get.push_back(SResource(new Resource(cb_modulecmd, 4, "maxscale", "modules", ":module", "?"))); + m_get.push_back(SResource(new Resource(cb_all_users, 1, "users"))); m_get.push_back(SResource(new Resource(cb_all_inet_users, 2, "users", "inet"))); m_get.push_back(SResource(new Resource(cb_all_unix_users, 2, "users", "unix"))); @@ -609,12 +673,15 @@ public: m_post.push_back(SResource(new Resource(cb_create_user, 2, "users", "inet"))); m_post.push_back(SResource(new Resource(cb_create_user, 2, "users", "unix"))); + /** For all module commands that modify state/data */ + m_post.push_back(SResource(new Resource(cb_modulecmd, 4, "maxscale", "modules", ":module", "?"))); + /** Update resources */ - m_put.push_back(SResource(new Resource(cb_alter_server, 2, "servers", ":server"))); - m_put.push_back(SResource(new Resource(cb_alter_monitor, 2, "monitors", ":monitor"))); - m_put.push_back(SResource(new Resource(cb_alter_service, 2, "services", ":service"))); - m_put.push_back(SResource(new Resource(cb_alter_logs, 2, "maxscale", "logs"))); - m_put.push_back(SResource(new Resource(cb_alter_maxscale, 1, "maxscale"))); + m_patch.push_back(SResource(new Resource(cb_alter_server, 2, "servers", ":server"))); + m_patch.push_back(SResource(new Resource(cb_alter_monitor, 2, "monitors", ":monitor"))); + m_patch.push_back(SResource(new Resource(cb_alter_service, 2, "services", ":service"))); + m_patch.push_back(SResource(new Resource(cb_alter_logs, 2, "maxscale", "logs"))); + m_patch.push_back(SResource(new Resource(cb_alter_maxscale, 1, "maxscale"))); /** Change resource states */ m_put.push_back(SResource(new Resource(cb_stop_monitor, 3, "monitors", ":monitor", "stop"))); @@ -707,6 +774,10 @@ public: { return process_request_type(m_put, request); } + else if (request.get_verb() == MHD_HTTP_METHOD_PATCH) + { + return process_request_type(m_patch, request); + } else if (request.get_verb() == MHD_HTTP_METHOD_POST) { return process_request_type(m_post, request); @@ -743,6 +814,7 @@ private: ResourceList m_put; /**< PUT request handlers */ ResourceList m_post; /**< POST request handlers */ ResourceList m_delete; /**< DELETE request handlers */ + ResourceList m_patch; /**< PATCH request handlers */ }; static RootResource resources; /**< Core resource set */ @@ -753,7 +825,8 @@ static bool request_modifies_data(const string& verb) { return verb == MHD_HTTP_METHOD_POST || verb == MHD_HTTP_METHOD_PUT || - verb == MHD_HTTP_METHOD_DELETE; + verb == MHD_HTTP_METHOD_DELETE || + verb == MHD_HTTP_METHOD_PATCH; } static bool request_reads_data(const string& verb) diff --git a/server/core/resultset.cc b/server/core/resultset.cc index 8abb0c9c7..f029e8b88 100644 --- a/server/core/resultset.cc +++ b/server/core/resultset.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/router.cc b/server/core/router.cc index e189680c5..489e3a3fb 100644 --- a/server/core/router.cc +++ b/server/core/router.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/secrets.cc b/server/core/secrets.cc index 3f75596d0..a63f29cdb 100644 --- a/server/core/secrets.cc +++ b/server/core/secrets.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/semaphore.cc b/server/core/semaphore.cc index 82c40e1e2..01ef1e51f 100644 --- a/server/core/semaphore.cc +++ b/server/core/semaphore.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/server.cc b/server/core/server.cc index 8d5aa164e..d5be37d62 100644 --- a/server/core/server.cc +++ b/server/core/server.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/service.cc b/server/core/service.cc index 237f256c8..e7ced7df0 100644 --- a/server/core/service.cc +++ b/server/core/service.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/session.cc b/server/core/session.cc index a5693c1ec..e96946adb 100644 --- a/server/core/session.cc +++ b/server/core/session.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -178,7 +178,6 @@ MXS_SESSION* session_alloc_with_id(SERVICE *service, DCB *client_dcb, uint64_t i static MXS_SESSION* session_alloc_body(SERVICE* service, DCB* client_dcb, MXS_SESSION* session) { - session->ses_is_child = (bool) DCB_IS_CLONE(client_dcb); session->service = service; session->client_dcb = client_dcb; session->stats.connect = time(0); @@ -309,7 +308,6 @@ session_set_dummy(DCB *client_dcb) session = &session_dummy_struct; session->ses_chk_top = CHK_NUM_SESSION; session->ses_chk_tail = CHK_NUM_SESSION; - session->ses_is_child = false; session->service = NULL; session->client_dcb = NULL; session->n_filters = 0; @@ -359,7 +357,7 @@ static void session_simple_free(MXS_SESSION *session, DCB *dcb) { /* Does this possibly need a lock? */ - if (dcb->data && !DCB_IS_CLONE(dcb)) + if (dcb->data) { void * clientdata = dcb->data; dcb->data = NULL; @@ -385,7 +383,7 @@ session_simple_free(MXS_SESSION *session, DCB *dcb) void session_close(MXS_SESSION *session) { - if (!session->ses_is_child && session->router_session) + if (session->router_session) { if (session->state != SESSION_STATE_STOPPING) { @@ -421,7 +419,7 @@ static void session_free(MXS_SESSION *session) * If session is not child of some other session, free router_session. * Otherwise let the parent free it. */ - if (!session->ses_is_child && session->router_session) + if (session->router_session) { session->service->router->freeSession(session->service->router_instance, session->router_session); @@ -450,12 +448,8 @@ static void session_free(MXS_SESSION *session) MXS_INFO("Stopped %s client session [%" PRIu64 "]", session->service->name, session->ses_id); - /** If session doesn't have parent referencing to it, it can be freed */ - if (!session->ses_is_child) - { - session->state = SESSION_STATE_FREE; - session_final_free(session); - } + session->state = SESSION_STATE_FREE; + session_final_free(session); } static void diff --git a/server/core/skygw_utils.cc b/server/core/skygw_utils.cc index 87853bb10..32694dee4 100644 --- a/server/core/skygw_utils.cc +++ b/server/core/skygw_utils.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/spinlock.cc b/server/core/spinlock.cc index b286662ad..d03213a4d 100644 --- a/server/core/spinlock.cc +++ b/server/core/spinlock.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/ssl.cc b/server/core/ssl.cc index b75359f10..59bb8bdd3 100644 --- a/server/core/ssl.cc +++ b/server/core/ssl.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/statistics.cc b/server/core/statistics.cc index 0ab4e8239..67ba4d0e7 100644 --- a/server/core/statistics.cc +++ b/server/core/statistics.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/rest-api/test/auth.js b/server/core/test/rest-api/test/auth.js index b8489b905..aa0eaf783 100644 --- a/server/core/test/rest-api/test/auth.js +++ b/server/core/test/rest-api/test/auth.js @@ -6,7 +6,7 @@ function set_auth(auth, value) { .then(function(resp) { var d = JSON.parse(resp) d.data.attributes.parameters.admin_auth = value; - return request.put(auth + host + "/maxscale", { json: d }) + return request.patch(auth + host + "/maxscale", { json: d }) }) .then(function() { return request.get(auth + host + "/maxscale") diff --git a/server/core/test/rest-api/test/core.js b/server/core/test/rest-api/test/core.js index 46fa3f279..ad8a4a12a 100644 --- a/server/core/test/rest-api/test/core.js +++ b/server/core/test/rest-api/test/core.js @@ -6,7 +6,7 @@ function set_value(key, value) { .then(function(resp) { var d = JSON.parse(resp) d.data.attributes.parameters[key] = value; - return request.put(base_url + "/maxscale", { json: d }) + return request.patch(base_url + "/maxscale", { json: d }) }) .then(function() { return request.get(base_url + "/maxscale") diff --git a/server/core/test/rest-api/test/errors.js b/server/core/test/rest-api/test/errors.js index c8885a81e..a1d498048 100644 --- a/server/core/test/rest-api/test/errors.js +++ b/server/core/test/rest-api/test/errors.js @@ -7,7 +7,7 @@ describe("Errors", function() it("error on invalid PUT request", function() { - return request.put(base_url + "/servers/server1", { json: {this_is: "a test"}}) + return request.patch(base_url + "/servers/server1", { json: {this_is: "a test"}}) .should.be.rejected }) diff --git a/server/core/test/rest-api/test/http.js b/server/core/test/rest-api/test/http.js index 8287e5826..9a0104a9a 100644 --- a/server/core/test/rest-api/test/http.js +++ b/server/core/test/rest-api/test/http.js @@ -10,7 +10,7 @@ describe("HTTP Headers", function() { resp.headers.etag.should.be.equal("\"0\"") var srv = JSON.parse(resp.body) delete srv.data.relationships - return request.put(base_url + "/servers/server1", {json: srv}) + return request.patch(base_url + "/servers/server1", {json: srv}) }) .then(function() { return request.get(base_url + "/servers/server1", {resolveWithFullResponse: true}) @@ -42,7 +42,7 @@ describe("HTTP Headers", function() { } } - request.put(base_url + "/servers/server1", {json: srv}) + request.patch(base_url + "/servers/server1", {json: srv}) .then(function() { return request.get(base_url + "/servers/server1", {resolveWithFullResponse: true}) }) diff --git a/server/core/test/rest-api/test/logs.js b/server/core/test/rest-api/test/logs.js index 10bd2d4ed..2c2e24326 100644 --- a/server/core/test/rest-api/test/logs.js +++ b/server/core/test/rest-api/test/logs.js @@ -17,7 +17,7 @@ describe("Logs", function() { logs.data.attributes.parameters.throttling.suppress_ms = 1 logs.data.attributes.parameters.throttling.window_ms = 1 - return request.put(base_url + "/maxscale/logs", {json: logs}) + return request.patch(base_url + "/maxscale/logs", {json: logs}) }) .then(function(resp) { return request.get(base_url + "/maxscale/logs") diff --git a/server/core/test/rest-api/test/monitor.js b/server/core/test/rest-api/test/monitor.js index 4d28bec3e..7c4deadcc 100644 --- a/server/core/test/rest-api/test/monitor.js +++ b/server/core/test/rest-api/test/monitor.js @@ -27,7 +27,7 @@ describe("Monitor", function() { monitor.data.attributes.parameters = { monitor_interval: 1000 } - return request.put(base_url + "/monitors/" + monitor.data.id, {json:monitor}) + return request.patch(base_url + "/monitors/" + monitor.data.id, {json:monitor}) .should.be.fulfilled }); @@ -53,7 +53,7 @@ describe("Monitor Relationships", function() { .then(function(resp) { var mon = JSON.parse(resp) delete mon.data.relationships.servers - return request.put(base_url + "/monitors/MySQL-Monitor", {json: mon}) + return request.patch(base_url + "/monitors/MySQL-Monitor", {json: mon}) }) .should.be.fulfilled }); @@ -69,7 +69,7 @@ describe("Monitor Relationships", function() { {id: "server3", type: "servers"}, {id: "server4", type: "servers"}, ] - return request.put(base_url + "/monitors/" + monitor.data.id, {json: mon}) + return request.patch(base_url + "/monitors/" + monitor.data.id, {json: mon}) }) .should.be.fulfilled }); @@ -80,7 +80,7 @@ describe("Monitor Relationships", function() { .then(function(resp) { var mon = JSON.parse(resp) delete mon.data.relationships.servers - return request.put(base_url + "/monitors/" + monitor.data.id, {json: mon}) + return request.patch(base_url + "/monitors/" + monitor.data.id, {json: mon}) }) .then(function() { return request.get(base_url + "/monitors/MySQL-Monitor") @@ -93,7 +93,7 @@ describe("Monitor Relationships", function() { {id: "server3", type: "servers"}, {id: "server4", type: "servers"}, ] - return request.put(base_url + "/monitors/MySQL-Monitor", {json: mon}) + return request.patch(base_url + "/monitors/MySQL-Monitor", {json: mon}) }) .should.be.fulfilled }); diff --git a/server/core/test/rest-api/test/server.js b/server/core/test/rest-api/test/server.js index 30f4ae9d4..4e0b86360 100644 --- a/server/core/test/rest-api/test/server.js +++ b/server/core/test/rest-api/test/server.js @@ -38,7 +38,7 @@ describe("Server", function() { it("update server", function() { server.data.attributes.parameters.weight = 10 - return request.put(base_url + "/servers/" + server.data.id, { json: server}) + return request.patch(base_url + "/servers/" + server.data.id, { json: server}) .should.be.fulfilled }); @@ -69,7 +69,7 @@ describe("Server Relationships", function() { it("remove relationships", function() { delete rel_server.data["relationships"] - return request.put(base_url + "/servers/" + rel_server.data.id, {json: rel_server}) + return request.patch(base_url + "/servers/" + rel_server.data.id, {json: rel_server}) .should.be.fulfilled }); diff --git a/server/core/test/rest-api/test/service.js b/server/core/test/rest-api/test/service.js index 2aac1fdff..6f445a120 100644 --- a/server/core/test/rest-api/test/service.js +++ b/server/core/test/rest-api/test/service.js @@ -8,7 +8,7 @@ describe("Service", function() { .then(function(resp) { var svc = JSON.parse(resp) svc.data.attributes.parameters.enable_root_user = true - return request.put(base_url + "/services/RW-Split-Router", {json: svc}) + return request.patch(base_url + "/services/RW-Split-Router", {json: svc}) }) .then(function(resp) { return request.get(base_url + "/services/RW-Split-Router") @@ -25,7 +25,7 @@ describe("Service", function() { .then(function(resp) { var svc = JSON.parse(resp) delete svc.data.relationships - return request.put(base_url + "/services/RW-Split-Router", {json: svc}) + return request.patch(base_url + "/services/RW-Split-Router", {json: svc}) }) .then(function(resp) { return request.get(base_url + "/services/RW-Split-Router") @@ -51,7 +51,7 @@ describe("Service", function() { } } - return request.put(base_url + "/services/RW-Split-Router", {json: svc}) + return request.patch(base_url + "/services/RW-Split-Router", {json: svc}) }) .then(function(resp) { return request.get(base_url + "/services/RW-Split-Router") diff --git a/server/core/test/test_utils.h b/server/core/test/test_utils.h index 172461108..f3e99eaea 100644 --- a/server/core/test/test_utils.h +++ b/server/core/test/test_utils.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testadminusers.cc b/server/core/test/testadminusers.cc index d1daeeb73..e501c511e 100644 --- a/server/core/test/testadminusers.cc +++ b/server/core/test/testadminusers.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testatomic.cc b/server/core/test/testatomic.cc index f50dbb45b..30988fadd 100644 --- a/server/core/test/testatomic.cc +++ b/server/core/test/testatomic.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testbuffer.cc b/server/core/test/testbuffer.cc index 6a5ee9f80..a7fc3d8bb 100644 --- a/server/core/test/testbuffer.cc +++ b/server/core/test/testbuffer.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testconfig.cc b/server/core/test/testconfig.cc index 564c9356b..215ff1951 100644 --- a/server/core/test/testconfig.cc +++ b/server/core/test/testconfig.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testdcb.cc b/server/core/test/testdcb.cc index d6ece4e9c..011700645 100644 --- a/server/core/test/testdcb.cc +++ b/server/core/test/testdcb.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testfeedback.cc b/server/core/test/testfeedback.cc index 30587ced6..de1dd8e0f 100644 --- a/server/core/test/testfeedback.cc +++ b/server/core/test/testfeedback.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testfilter.cc b/server/core/test/testfilter.cc index 24b603f74..c7c7b267a 100644 --- a/server/core/test/testfilter.cc +++ b/server/core/test/testfilter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testhash.cc b/server/core/test/testhash.cc index fa0198957..a40cde0d2 100644 --- a/server/core/test/testhash.cc +++ b/server/core/test/testhash.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testhint.cc b/server/core/test/testhint.cc index 01b04a9ee..9db0fafff 100644 --- a/server/core/test/testhint.cc +++ b/server/core/test/testhint.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testhttp.cc b/server/core/test/testhttp.cc index a27e46feb..1e55624a4 100644 --- a/server/core/test/testhttp.cc +++ b/server/core/test/testhttp.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testjson.cc b/server/core/test/testjson.cc index 48df4ba57..50e33a1b2 100644 --- a/server/core/test/testjson.cc +++ b/server/core/test/testjson.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testlog.cc b/server/core/test/testlog.cc index 61c171d1c..0af32ae53 100644 --- a/server/core/test/testlog.cc +++ b/server/core/test/testlog.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testlogorder.cc b/server/core/test/testlogorder.cc index 6790da08b..161ca6944 100644 --- a/server/core/test/testlogorder.cc +++ b/server/core/test/testlogorder.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testlogthrottling.cc b/server/core/test/testlogthrottling.cc index 685d8ea7b..1d6faf6ee 100644 --- a/server/core/test/testlogthrottling.cc +++ b/server/core/test/testlogthrottling.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testmaxscalepcre2.cc b/server/core/test/testmaxscalepcre2.cc index 4ea1c2d9b..773224c51 100644 --- a/server/core/test/testmaxscalepcre2.cc +++ b/server/core/test/testmaxscalepcre2.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testmodulecmd.cc b/server/core/test/testmodulecmd.cc index c757e136c..e98ba442d 100644 --- a/server/core/test/testmodulecmd.cc +++ b/server/core/test/testmodulecmd.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -66,10 +66,10 @@ int test_arguments() TEST(modulecmd_find_command(ns, id) == NULL, "The registered command should not yet be found"); TEST(strlen(modulecmd_get_error()), "Error message should not be empty"); - TEST(modulecmd_register_command(ns, id, test_fn, 2, args1), + TEST(modulecmd_register_command(ns, id, MODULECMD_TYPE_ACTIVE, test_fn, 2, args1), "Registering a command should succeed"); - TEST(!modulecmd_register_command(ns, id, test_fn, 2, args1), + TEST(!modulecmd_register_command(ns, id, MODULECMD_TYPE_ACTIVE, test_fn, 2, args1), "Registering the command a second time should fail"); TEST(strlen(modulecmd_get_error()), "Error message should not be empty"); @@ -162,7 +162,7 @@ int test_optional_arguments() {MODULECMD_ARG_BOOLEAN | MODULECMD_ARG_OPTIONAL, ""} }; - TEST(modulecmd_register_command(ns, id, test_fn2, 2, args1), + TEST(modulecmd_register_command(ns, id, MODULECMD_TYPE_ACTIVE, test_fn2, 2, args1), "Registering a command should succeed"); const MODULECMD *cmd = modulecmd_find_command(ns, id); @@ -234,7 +234,7 @@ int test_module_errors() const char *ns = "test_module_errors"; const char *id = "test_module_errors"; - TEST(modulecmd_register_command(ns, id, test_fn3, 0, NULL), + TEST(modulecmd_register_command(ns, id, MODULECMD_TYPE_ACTIVE, test_fn3, 0, NULL), "Registering a command should succeed"); const MODULECMD *cmd = modulecmd_find_command(ns, id); @@ -266,7 +266,7 @@ int test_map() { char id[200]; sprintf(id, "test_map%d", i + 1); - TEST(modulecmd_register_command(map_dom, id, test_fn_map, 0, NULL), + TEST(modulecmd_register_command(map_dom, id, MODULECMD_TYPE_ACTIVE, test_fn_map, 0, NULL), "Registering a command should succeed"); } @@ -332,7 +332,8 @@ int test_pointers() {MODULECMD_ARG_DCB, ""} }; - TEST(modulecmd_register_command(ns, id, ptrfn, 1, args), "Registering a command should succeed"); + TEST(modulecmd_register_command(ns, id, MODULECMD_TYPE_ACTIVE, ptrfn, 1, args), + "Registering a command should succeed"); TEST(strlen(modulecmd_get_error()) == 0, "Error message should be empty"); const MODULECMD *cmd = modulecmd_find_command(ns, id); @@ -366,7 +367,8 @@ int test_domain_matching() {MODULECMD_ARG_MONITOR | MODULECMD_ARG_NAME_MATCHES_DOMAIN, ""} }; - TEST(modulecmd_register_command(ns, id, monfn, 1, args), "Registering a command should succeed"); + TEST(modulecmd_register_command(ns, id, MODULECMD_TYPE_ACTIVE, monfn, 1, args), + "Registering a command should succeed"); TEST(strlen(modulecmd_get_error()) == 0, "Error message should be empty"); const MODULECMD *cmd = modulecmd_find_command(ns, id); diff --git a/server/core/test/testmodutil.cc b/server/core/test/testmodutil.cc index f6b0a6b42..350a1320e 100644 --- a/server/core/test/testmodutil.cc +++ b/server/core/test/testmodutil.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testpoll.cc b/server/core/test/testpoll.cc index 4ef1c0eda..278ab9870 100644 --- a/server/core/test/testpoll.cc +++ b/server/core/test/testpoll.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testqueuemanager.cc b/server/core/test/testqueuemanager.cc index 9f5e22f9e..75d233144 100644 --- a/server/core/test/testqueuemanager.cc +++ b/server/core/test/testqueuemanager.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testsemaphore.cc b/server/core/test/testsemaphore.cc index f5e2b6c59..eadd2be39 100644 --- a/server/core/test/testsemaphore.cc +++ b/server/core/test/testsemaphore.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testserver.cc b/server/core/test/testserver.cc index c87e58f91..4a49df488 100644 --- a/server/core/test/testserver.cc +++ b/server/core/test/testserver.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testservice.cc b/server/core/test/testservice.cc index d0503ba5d..6b76d7d3f 100644 --- a/server/core/test/testservice.cc +++ b/server/core/test/testservice.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testsession.cc b/server/core/test/testsession.cc index dfa626079..d121e4e04 100644 --- a/server/core/test/testsession.cc +++ b/server/core/test/testsession.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testspinlock.cc b/server/core/test/testspinlock.cc index 0553a7453..52d935cc0 100644 --- a/server/core/test/testspinlock.cc +++ b/server/core/test/testspinlock.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testtrxcompare.cc b/server/core/test/testtrxcompare.cc index ae85f91c5..23120e60b 100644 --- a/server/core/test/testtrxcompare.cc +++ b/server/core/test/testtrxcompare.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testtrxtracking.cc b/server/core/test/testtrxtracking.cc index 7ee715f15..88b43732a 100644 --- a/server/core/test/testtrxtracking.cc +++ b/server/core/test/testtrxtracking.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/testusers.cc b/server/core/test/testusers.cc index 8d57127f2..695ed2305 100644 --- a/server/core/test/testusers.cc +++ b/server/core/test/testusers.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/test/trxboundaryparser_profile.cc b/server/core/test/trxboundaryparser_profile.cc index be9a6043f..6d02d4b86 100644 --- a/server/core/test/trxboundaryparser_profile.cc +++ b/server/core/test/trxboundaryparser_profile.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/thread.cc b/server/core/thread.cc index 84c86579b..b0856dce0 100644 --- a/server/core/thread.cc +++ b/server/core/thread.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/users.cc b/server/core/users.cc index 6ac253c21..0c56da0eb 100644 --- a/server/core/users.cc +++ b/server/core/users.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/utils.cc b/server/core/utils.cc index daa563345..22db13f12 100644 --- a/server/core/utils.cc +++ b/server/core/utils.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/worker.cc b/server/core/worker.cc index fe27244de..9728aea45 100644 --- a/server/core/worker.cc +++ b/server/core/worker.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/core/workertask.cc b/server/core/workertask.cc index 2b264238c..ecd4be373 100644 --- a/server/core/workertask.cc +++ b/server/core/workertask.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/CDCPlainAuth/cdc_plain_auth.c b/server/modules/authenticator/CDCPlainAuth/cdc_plain_auth.c index 30d7e1dfd..dff5dadc5 100644 --- a/server/modules/authenticator/CDCPlainAuth/cdc_plain_auth.c +++ b/server/modules/authenticator/CDCPlainAuth/cdc_plain_auth.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -154,7 +154,7 @@ MXS_MODULE* MXS_CREATE_MODULE() { MODULECMD_ARG_STRING, "Password of the user"} }; - modulecmd_register_command("cdc", "add_user", cdc_add_new_user, 3, args); + modulecmd_register_command("cdc", "add_user", MODULECMD_TYPE_ACTIVE, cdc_add_new_user, 3, args); static MXS_AUTHENTICATOR MyObject = { diff --git a/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c b/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c index d16141120..a63c9d7ed 100644 --- a/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c +++ b/server/modules/authenticator/GSSAPI/GSSAPIAuth/gssapi_auth.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/GSSAPI/GSSAPIBackendAuth/gssapi_backend_auth.c b/server/modules/authenticator/GSSAPI/GSSAPIBackendAuth/gssapi_backend_auth.c index 8396b1bde..992964c75 100644 --- a/server/modules/authenticator/GSSAPI/GSSAPIBackendAuth/gssapi_backend_auth.c +++ b/server/modules/authenticator/GSSAPI/GSSAPIBackendAuth/gssapi_backend_auth.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/GSSAPI/gssapi_auth.h b/server/modules/authenticator/GSSAPI/gssapi_auth.h index 6a8bd90de..883a1ee0a 100644 --- a/server/modules/authenticator/GSSAPI/gssapi_auth.h +++ b/server/modules/authenticator/GSSAPI/gssapi_auth.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/GSSAPI/gssapi_auth_common.c b/server/modules/authenticator/GSSAPI/gssapi_auth_common.c index d4dca714d..3cda02c80 100644 --- a/server/modules/authenticator/GSSAPI/gssapi_auth_common.c +++ b/server/modules/authenticator/GSSAPI/gssapi_auth_common.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/HTTPAuth/http_auth.c b/server/modules/authenticator/HTTPAuth/http_auth.c index 1475953a4..62dfcf5ae 100644 --- a/server/modules/authenticator/HTTPAuth/http_auth.c +++ b/server/modules/authenticator/HTTPAuth/http_auth.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/MaxAdminAuth/max_admin_auth.c b/server/modules/authenticator/MaxAdminAuth/max_admin_auth.c index d06447f8a..a14f6bb66 100644 --- a/server/modules/authenticator/MaxAdminAuth/max_admin_auth.c +++ b/server/modules/authenticator/MaxAdminAuth/max_admin_auth.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/MySQLAuth/dbusers.c b/server/modules/authenticator/MySQLAuth/dbusers.c index 3286562be..9228edfe7 100644 --- a/server/modules/authenticator/MySQLAuth/dbusers.c +++ b/server/modules/authenticator/MySQLAuth/dbusers.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/MySQLAuth/mysql_auth.c b/server/modules/authenticator/MySQLAuth/mysql_auth.c index 1515df5b6..b74785cdf 100644 --- a/server/modules/authenticator/MySQLAuth/mysql_auth.c +++ b/server/modules/authenticator/MySQLAuth/mysql_auth.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/MySQLAuth/mysql_auth.h b/server/modules/authenticator/MySQLAuth/mysql_auth.h index ec10c5401..b453704c2 100644 --- a/server/modules/authenticator/MySQLAuth/mysql_auth.h +++ b/server/modules/authenticator/MySQLAuth/mysql_auth.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/MySQLBackendAuth/mysql_backend_auth.c b/server/modules/authenticator/MySQLBackendAuth/mysql_backend_auth.c index e034d3c23..2564a4f97 100644 --- a/server/modules/authenticator/MySQLBackendAuth/mysql_backend_auth.c +++ b/server/modules/authenticator/MySQLBackendAuth/mysql_backend_auth.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/NullAuthAllow/null_auth_allow.c b/server/modules/authenticator/NullAuthAllow/null_auth_allow.c index 596b1a045..1d6dbd63c 100644 --- a/server/modules/authenticator/NullAuthAllow/null_auth_allow.c +++ b/server/modules/authenticator/NullAuthAllow/null_auth_allow.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/authenticator/NullAuthDeny/null_auth_deny.c b/server/modules/authenticator/NullAuthDeny/null_auth_deny.c index 721702751..c2ff64215 100644 --- a/server/modules/authenticator/NullAuthDeny/null_auth_deny.c +++ b/server/modules/authenticator/NullAuthDeny/null_auth_deny.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cache.cc b/server/modules/filter/cache/cache.cc index 2c24441d2..1542e03f5 100644 --- a/server/modules/filter/cache/cache.cc +++ b/server/modules/filter/cache/cache.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cache.hh b/server/modules/filter/cache/cache.hh index da84a2b9b..84653745d 100644 --- a/server/modules/filter/cache/cache.hh +++ b/server/modules/filter/cache/cache.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cache_storage_api.c b/server/modules/filter/cache/cache_storage_api.c index 93e3ea84a..13b5185c4 100644 --- a/server/modules/filter/cache/cache_storage_api.c +++ b/server/modules/filter/cache/cache_storage_api.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cache_storage_api.cc b/server/modules/filter/cache/cache_storage_api.cc index 637013d7b..107b4b51c 100644 --- a/server/modules/filter/cache/cache_storage_api.cc +++ b/server/modules/filter/cache/cache_storage_api.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cache_storage_api.h b/server/modules/filter/cache/cache_storage_api.h index ff065a92c..479420fb8 100644 --- a/server/modules/filter/cache/cache_storage_api.h +++ b/server/modules/filter/cache/cache_storage_api.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cache_storage_api.hh b/server/modules/filter/cache/cache_storage_api.hh index ac26e7f81..e5c7dd081 100644 --- a/server/modules/filter/cache/cache_storage_api.hh +++ b/server/modules/filter/cache/cache_storage_api.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachefilter.cc b/server/modules/filter/cache/cachefilter.cc index 5417dec2b..c851bad8e 100644 --- a/server/modules/filter/cache/cachefilter.cc +++ b/server/modules/filter/cache/cachefilter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -147,8 +147,8 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() { MODULECMD_ARG_FILTER | MODULECMD_ARG_NAME_MATCHES_DOMAIN, "Cache name" } }; - modulecmd_register_command(MXS_MODULE_NAME, "show", cache_command_show, - MXS_ARRAY_NELEMS(show_argv), show_argv); + modulecmd_register_command(MXS_MODULE_NAME, "show", MODULECMD_TYPE_PASSIVE, + cache_command_show, MXS_ARRAY_NELEMS(show_argv), show_argv); MXS_NOTICE("Initialized cache module %s.\n", VERSION_STRING); diff --git a/server/modules/filter/cache/cachefilter.h b/server/modules/filter/cache/cachefilter.h index ba7b8636f..cda1c2130 100644 --- a/server/modules/filter/cache/cachefilter.h +++ b/server/modules/filter/cache/cachefilter.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachefilter.hh b/server/modules/filter/cache/cachefilter.hh index b62968c53..c927ea478 100644 --- a/server/modules/filter/cache/cachefilter.hh +++ b/server/modules/filter/cache/cachefilter.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachefiltersession.cc b/server/modules/filter/cache/cachefiltersession.cc index e63fb22d5..5eb5f5d4f 100644 --- a/server/modules/filter/cache/cachefiltersession.cc +++ b/server/modules/filter/cache/cachefiltersession.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachefiltersession.hh b/server/modules/filter/cache/cachefiltersession.hh index b488f61c0..d1e51985b 100644 --- a/server/modules/filter/cache/cachefiltersession.hh +++ b/server/modules/filter/cache/cachefiltersession.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachemt.cc b/server/modules/filter/cache/cachemt.cc index 57cb3571d..274ced1af 100644 --- a/server/modules/filter/cache/cachemt.cc +++ b/server/modules/filter/cache/cachemt.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachemt.hh b/server/modules/filter/cache/cachemt.hh index 23a78a5ca..0965ed628 100644 --- a/server/modules/filter/cache/cachemt.hh +++ b/server/modules/filter/cache/cachemt.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachept.cc b/server/modules/filter/cache/cachept.cc index 64ebf17c1..bf6433b85 100644 --- a/server/modules/filter/cache/cachept.cc +++ b/server/modules/filter/cache/cachept.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachept.hh b/server/modules/filter/cache/cachept.hh index e9f0e544a..21266a910 100644 --- a/server/modules/filter/cache/cachept.hh +++ b/server/modules/filter/cache/cachept.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachesimple.cc b/server/modules/filter/cache/cachesimple.cc index 6c64f3e20..b0e6902fd 100644 --- a/server/modules/filter/cache/cachesimple.cc +++ b/server/modules/filter/cache/cachesimple.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachesimple.hh b/server/modules/filter/cache/cachesimple.hh index 4a2d29e3d..0fee6f7e1 100644 --- a/server/modules/filter/cache/cachesimple.hh +++ b/server/modules/filter/cache/cachesimple.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachest.cc b/server/modules/filter/cache/cachest.cc index ed31a4a02..c622cfa92 100644 --- a/server/modules/filter/cache/cachest.cc +++ b/server/modules/filter/cache/cachest.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/cachest.hh b/server/modules/filter/cache/cachest.hh index 97c293d4e..f7ee25fdc 100644 --- a/server/modules/filter/cache/cachest.hh +++ b/server/modules/filter/cache/cachest.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/lrustorage.cc b/server/modules/filter/cache/lrustorage.cc index f1e948879..d220317f2 100644 --- a/server/modules/filter/cache/lrustorage.cc +++ b/server/modules/filter/cache/lrustorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/lrustorage.hh b/server/modules/filter/cache/lrustorage.hh index b13ab93c7..117ba12f4 100644 --- a/server/modules/filter/cache/lrustorage.hh +++ b/server/modules/filter/cache/lrustorage.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/lrustoragemt.cc b/server/modules/filter/cache/lrustoragemt.cc index e864b20ff..a4a029c90 100644 --- a/server/modules/filter/cache/lrustoragemt.cc +++ b/server/modules/filter/cache/lrustoragemt.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/lrustoragemt.hh b/server/modules/filter/cache/lrustoragemt.hh index 06de6c090..501f28c99 100644 --- a/server/modules/filter/cache/lrustoragemt.hh +++ b/server/modules/filter/cache/lrustoragemt.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/lrustoragest.cc b/server/modules/filter/cache/lrustoragest.cc index fca2e27b3..19ba0bbcc 100644 --- a/server/modules/filter/cache/lrustoragest.cc +++ b/server/modules/filter/cache/lrustoragest.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/lrustoragest.hh b/server/modules/filter/cache/lrustoragest.hh index eb118c893..c391495bc 100644 --- a/server/modules/filter/cache/lrustoragest.hh +++ b/server/modules/filter/cache/lrustoragest.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/rules.cc b/server/modules/filter/cache/rules.cc index 3e4e7dd6f..84f964c3c 100644 --- a/server/modules/filter/cache/rules.cc +++ b/server/modules/filter/cache/rules.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/rules.h b/server/modules/filter/cache/rules.h index 34048cd27..ae4ff208f 100644 --- a/server/modules/filter/cache/rules.h +++ b/server/modules/filter/cache/rules.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage.cc b/server/modules/filter/cache/storage.cc index 6dbf6e7aa..d6a503fad 100644 --- a/server/modules/filter/cache/storage.cc +++ b/server/modules/filter/cache/storage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage.hh b/server/modules/filter/cache/storage.hh index ecaba0809..8213f0d9c 100644 --- a/server/modules/filter/cache/storage.hh +++ b/server/modules/filter/cache/storage.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc index 6bd33f51e..a3163d23f 100644 --- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc +++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.hh b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.hh index 0ec2f6e4b..f778b0c36 100644 --- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.hh +++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystorage.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc index 161734ab4..f42ebbc34 100644 --- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc +++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.hh b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.hh index 2c4392ba5..c73bcef85 100644 --- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.hh +++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragemt.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc index f3d49d64c..989189913 100644 --- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc +++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.hh b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.hh index 35a42448e..cc0259456 100644 --- a/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.hh +++ b/server/modules/filter/cache/storage/storage_inmemory/inmemorystoragest.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc b/server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc index c50c0a04b..3672c88d0 100644 --- a/server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc +++ b/server/modules/filter/cache/storage/storage_inmemory/storage_inmemory.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc index 4874df7aa..fa2151dfc 100644 --- a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc +++ b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.hh b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.hh index 14582df32..f3ff307fe 100644 --- a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.hh +++ b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbinternals.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc index 64524f6a4..c289a868a 100644 --- a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc +++ b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.hh b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.hh index 80275158e..a18154342 100644 --- a/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.hh +++ b/server/modules/filter/cache/storage/storage_rocksdb/rocksdbstorage.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc index 160d1b256..556ebd8ec 100644 --- a/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc +++ b/server/modules/filter/cache/storage/storage_rocksdb/storage_rocksdb.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storage/storagemodule.hh b/server/modules/filter/cache/storage/storagemodule.hh index dae4eb2d5..bd87336f7 100644 --- a/server/modules/filter/cache/storage/storagemodule.hh +++ b/server/modules/filter/cache/storage/storagemodule.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storagefactory.cc b/server/modules/filter/cache/storagefactory.cc index 9c22b396e..a6f1890cb 100644 --- a/server/modules/filter/cache/storagefactory.cc +++ b/server/modules/filter/cache/storagefactory.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storagefactory.hh b/server/modules/filter/cache/storagefactory.hh index 3c8d59d5f..5273827a2 100644 --- a/server/modules/filter/cache/storagefactory.hh +++ b/server/modules/filter/cache/storagefactory.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storagereal.cc b/server/modules/filter/cache/storagereal.cc index 44bb8be9c..12571b805 100644 --- a/server/modules/filter/cache/storagereal.cc +++ b/server/modules/filter/cache/storagereal.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/storagereal.hh b/server/modules/filter/cache/storagereal.hh index 79b5b2370..26d96bb98 100644 --- a/server/modules/filter/cache/storagereal.hh +++ b/server/modules/filter/cache/storagereal.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/tester.cc b/server/modules/filter/cache/test/tester.cc index 38adaf648..104f4031f 100644 --- a/server/modules/filter/cache/test/tester.cc +++ b/server/modules/filter/cache/test/tester.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/tester.hh b/server/modules/filter/cache/test/tester.hh index 46149d552..72dcc6614 100644 --- a/server/modules/filter/cache/test/tester.hh +++ b/server/modules/filter/cache/test/tester.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testerlrustorage.cc b/server/modules/filter/cache/test/testerlrustorage.cc index 1a8cbdca2..cd27169a9 100644 --- a/server/modules/filter/cache/test/testerlrustorage.cc +++ b/server/modules/filter/cache/test/testerlrustorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testerlrustorage.hh b/server/modules/filter/cache/test/testerlrustorage.hh index 3f28941a0..eee412aee 100644 --- a/server/modules/filter/cache/test/testerlrustorage.hh +++ b/server/modules/filter/cache/test/testerlrustorage.hh @@ -6,7 +6,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testerrawstorage.cc b/server/modules/filter/cache/test/testerrawstorage.cc index 890fd15f4..4794d75c1 100644 --- a/server/modules/filter/cache/test/testerrawstorage.cc +++ b/server/modules/filter/cache/test/testerrawstorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testerrawstorage.hh b/server/modules/filter/cache/test/testerrawstorage.hh index 9e122b803..560057efb 100644 --- a/server/modules/filter/cache/test/testerrawstorage.hh +++ b/server/modules/filter/cache/test/testerrawstorage.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testerstorage.cc b/server/modules/filter/cache/test/testerstorage.cc index 2877a8bf0..cfc1dc808 100644 --- a/server/modules/filter/cache/test/testerstorage.cc +++ b/server/modules/filter/cache/test/testerstorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testerstorage.hh b/server/modules/filter/cache/test/testerstorage.hh index 8ec627cb4..ccf9b962d 100644 --- a/server/modules/filter/cache/test/testerstorage.hh +++ b/server/modules/filter/cache/test/testerstorage.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testkeygeneration.cc b/server/modules/filter/cache/test/testkeygeneration.cc index a17518505..37d86c004 100644 --- a/server/modules/filter/cache/test/testkeygeneration.cc +++ b/server/modules/filter/cache/test/testkeygeneration.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testlrustorage.cc b/server/modules/filter/cache/test/testlrustorage.cc index 75431726c..200bb45c8 100644 --- a/server/modules/filter/cache/test/testlrustorage.cc +++ b/server/modules/filter/cache/test/testlrustorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testrawstorage.cc b/server/modules/filter/cache/test/testrawstorage.cc index 6f07691ca..45fc4f866 100644 --- a/server/modules/filter/cache/test/testrawstorage.cc +++ b/server/modules/filter/cache/test/testrawstorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/testrules.cc b/server/modules/filter/cache/test/testrules.cc index 932b51870..4a85688f5 100644 --- a/server/modules/filter/cache/test/testrules.cc +++ b/server/modules/filter/cache/test/testrules.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/teststorage.cc b/server/modules/filter/cache/test/teststorage.cc index 8f243b85d..3f7e996c5 100644 --- a/server/modules/filter/cache/test/teststorage.cc +++ b/server/modules/filter/cache/test/teststorage.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/cache/test/teststorage.hh b/server/modules/filter/cache/test/teststorage.hh index 727015091..3f223b07e 100644 --- a/server/modules/filter/cache/test/teststorage.hh +++ b/server/modules/filter/cache/test/teststorage.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/ccrfilter/ccrfilter.c b/server/modules/filter/ccrfilter/ccrfilter.c index c4bca0773..8dd4dcc02 100644 --- a/server/modules/filter/ccrfilter/ccrfilter.c +++ b/server/modules/filter/ccrfilter/ccrfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/dbfwfilter/dbfw_rule_check.c b/server/modules/filter/dbfwfilter/dbfw_rule_check.c index 6da37ec36..77a1d9854 100644 --- a/server/modules/filter/dbfwfilter/dbfw_rule_check.c +++ b/server/modules/filter/dbfwfilter/dbfw_rule_check.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/dbfwfilter/dbfwfilter.c b/server/modules/filter/dbfwfilter/dbfwfilter.c index e6c58c540..094e4d199 100644 --- a/server/modules/filter/dbfwfilter/dbfwfilter.c +++ b/server/modules/filter/dbfwfilter/dbfwfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -826,7 +826,8 @@ MXS_MODULE* MXS_CREATE_MODULE() {MODULECMD_ARG_STRING | MODULECMD_ARG_OPTIONAL, "Path to rule file"} }; - modulecmd_register_command(MXS_MODULE_NAME, "rules/reload", dbfw_reload_rules, 2, args_rules_reload); + modulecmd_register_command(MXS_MODULE_NAME, "rules/reload", MODULECMD_TYPE_ACTIVE, + dbfw_reload_rules, 2, args_rules_reload); modulecmd_arg_type_t args_rules_show[] = { @@ -834,7 +835,8 @@ MXS_MODULE* MXS_CREATE_MODULE() {MODULECMD_ARG_FILTER | MODULECMD_ARG_NAME_MATCHES_DOMAIN, "Filter to inspect"} }; - modulecmd_register_command(MXS_MODULE_NAME, "rules", dbfw_show_rules, 2, args_rules_show); + modulecmd_register_command(MXS_MODULE_NAME, "rules", MODULECMD_TYPE_PASSIVE, + dbfw_show_rules, 2, args_rules_show); static MXS_FILTER_OBJECT MyObject = { diff --git a/server/modules/filter/dbfwfilter/dbfwfilter.h b/server/modules/filter/dbfwfilter/dbfwfilter.h index 09de750dc..4c40353cf 100644 --- a/server/modules/filter/dbfwfilter/dbfwfilter.h +++ b/server/modules/filter/dbfwfilter/dbfwfilter.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/dbfwfilter/ruleparser.y b/server/modules/filter/dbfwfilter/ruleparser.y index 33ad53cfe..f9b34ea9b 100644 --- a/server/modules/filter/dbfwfilter/ruleparser.y +++ b/server/modules/filter/dbfwfilter/ruleparser.y @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/dbfwfilter/token.l b/server/modules/filter/dbfwfilter/token.l index 324e2cbf5..2fe221db4 100644 --- a/server/modules/filter/dbfwfilter/token.l +++ b/server/modules/filter/dbfwfilter/token.l @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/hintfilter/hintfilter.c b/server/modules/filter/hintfilter/hintfilter.c index 7765cf719..87541e148 100644 --- a/server/modules/filter/hintfilter/hintfilter.c +++ b/server/modules/filter/hintfilter/hintfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/hintfilter/hintparser.c b/server/modules/filter/hintfilter/hintparser.c index 23d87e167..3881df8bb 100644 --- a/server/modules/filter/hintfilter/hintparser.c +++ b/server/modules/filter/hintfilter/hintparser.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/hintfilter/mysqlhint.h b/server/modules/filter/hintfilter/mysqlhint.h index cb6ac899d..38004e6ef 100644 --- a/server/modules/filter/hintfilter/mysqlhint.h +++ b/server/modules/filter/hintfilter/mysqlhint.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/insertstream/insertstream.c b/server/modules/filter/insertstream/insertstream.c index 3a0172bbc..e61b2236d 100644 --- a/server/modules/filter/insertstream/insertstream.c +++ b/server/modules/filter/insertstream/insertstream.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/luafilter/luafilter.c b/server/modules/filter/luafilter/luafilter.c index 721116bb5..8706de491 100644 --- a/server/modules/filter/luafilter/luafilter.c +++ b/server/modules/filter/luafilter/luafilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/maskingfilter.cc b/server/modules/filter/masking/maskingfilter.cc index 325e3d13e..c1bf792d5 100644 --- a/server/modules/filter/masking/maskingfilter.cc +++ b/server/modules/filter/masking/maskingfilter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -64,7 +64,8 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE() { MODULECMD_ARG_FILTER | MODULECMD_ARG_NAME_MATCHES_DOMAIN, "Masking name" } }; - modulecmd_register_command(MXS_MODULE_NAME, "reload", masking_command_reload, + modulecmd_register_command(MXS_MODULE_NAME, "reload", + MODULECMD_TYPE_ACTIVE, masking_command_reload, MXS_ARRAY_NELEMS(reload_argv), reload_argv); MXS_NOTICE("Masking module %s initialized.", VERSION_STRING); diff --git a/server/modules/filter/masking/maskingfilter.hh b/server/modules/filter/masking/maskingfilter.hh index a540adcb9..816258556 100644 --- a/server/modules/filter/masking/maskingfilter.hh +++ b/server/modules/filter/masking/maskingfilter.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/maskingfilterconfig.cc b/server/modules/filter/masking/maskingfilterconfig.cc index 0ef0de005..1a2dc46d6 100644 --- a/server/modules/filter/masking/maskingfilterconfig.cc +++ b/server/modules/filter/masking/maskingfilterconfig.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/maskingfilterconfig.hh b/server/modules/filter/masking/maskingfilterconfig.hh index 540ff5f99..9d5c475ef 100644 --- a/server/modules/filter/masking/maskingfilterconfig.hh +++ b/server/modules/filter/masking/maskingfilterconfig.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/maskingfiltersession.cc b/server/modules/filter/masking/maskingfiltersession.cc index 1f0c4595e..9aa6a61d0 100644 --- a/server/modules/filter/masking/maskingfiltersession.cc +++ b/server/modules/filter/masking/maskingfiltersession.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/maskingfiltersession.hh b/server/modules/filter/masking/maskingfiltersession.hh index b80ab26bd..0c9ae5f3c 100644 --- a/server/modules/filter/masking/maskingfiltersession.hh +++ b/server/modules/filter/masking/maskingfiltersession.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/maskingrules.cc b/server/modules/filter/masking/maskingrules.cc index a3241402a..c46636498 100644 --- a/server/modules/filter/masking/maskingrules.cc +++ b/server/modules/filter/masking/maskingrules.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -413,19 +413,34 @@ auto_ptr create_rule_from_elements(json_t* pReplace, json_t* pValue = json_object_get(pWith, KEY_VALUE); json_t* pFill = json_object_get(pWith, KEY_FILL); - if ((pValue || pFill) && - (!pValue || json_is_string(pValue)) && - (!pFill || json_is_string(pFill))) + if (!pFill) { - sRule = create_rule_from_elements(pColumn, pTable, pDatabase, - pValue, pFill, - pApplies_to, pExempted); + // Allowed. Use default value for fill and add it to pWith. + pFill = json_string("X"); + if (pFill) + { + json_object_set_new(pWith, KEY_FILL, pFill); + } + else + { + MXS_ERROR("json_string() error, cannot produce a valid rule."); + } } - else + if (pFill) { - MXS_ERROR("The '%s' object of a masking rule does not have either '%s' " - "or '%s' as keys, or their values are not strings.", - KEY_WITH, KEY_VALUE, KEY_FILL); + if ((!pValue || (json_is_string(pValue) && json_string_length(pValue))) && + (json_is_string(pFill) && json_string_length(pFill))) + { + sRule = create_rule_from_elements(pColumn, pTable, pDatabase, + pValue, pFill, + pApplies_to, pExempted); + } + else + { + MXS_ERROR("One of the keys '%s' or '%s' of masking rule object '%s' " + "has a non-string value or the string is empty.", + KEY_VALUE, KEY_FILL, KEY_WITH); + } } } else diff --git a/server/modules/filter/masking/maskingrules.hh b/server/modules/filter/masking/maskingrules.hh index 61efcc28c..823559e92 100644 --- a/server/modules/filter/masking/maskingrules.hh +++ b/server/modules/filter/masking/maskingrules.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/mysql.hh b/server/modules/filter/masking/mysql.hh index 351be6b7d..44b5b65a5 100644 --- a/server/modules/filter/masking/mysql.hh +++ b/server/modules/filter/masking/mysql.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/masking/test/testrules.cc b/server/modules/filter/masking/test/testrules.cc index 05286a1d1..279c05d03 100644 --- a/server/modules/filter/masking/test/testrules.cc +++ b/server/modules/filter/masking/test/testrules.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/maxrows/maxrows.c b/server/modules/filter/maxrows/maxrows.c index 6021bc7ae..e9a6d56cf 100644 --- a/server/modules/filter/maxrows/maxrows.c +++ b/server/modules/filter/maxrows/maxrows.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/maxrows/maxrows.h b/server/modules/filter/maxrows/maxrows.h index 72a524297..d5aad0da6 100644 --- a/server/modules/filter/maxrows/maxrows.h +++ b/server/modules/filter/maxrows/maxrows.h @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/mqfilter/mqfilter.c b/server/modules/filter/mqfilter/mqfilter.c index 2c26baacd..71622b4d3 100644 --- a/server/modules/filter/mqfilter/mqfilter.c +++ b/server/modules/filter/mqfilter/mqfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/namedserverfilter/namedserverfilter.cc b/server/modules/filter/namedserverfilter/namedserverfilter.cc index a67f0c8de..9328b821b 100644 --- a/server/modules/filter/namedserverfilter/namedserverfilter.cc +++ b/server/modules/filter/namedserverfilter/namedserverfilter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/namedserverfilter/namedserverfilter.hh b/server/modules/filter/namedserverfilter/namedserverfilter.hh index f9d4f0f8d..3a2175e98 100644 --- a/server/modules/filter/namedserverfilter/namedserverfilter.hh +++ b/server/modules/filter/namedserverfilter/namedserverfilter.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/nullfilter/nullfilter.cc b/server/modules/filter/nullfilter/nullfilter.cc index 55256c592..696636781 100644 --- a/server/modules/filter/nullfilter/nullfilter.cc +++ b/server/modules/filter/nullfilter/nullfilter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/nullfilter/nullfilter.hh b/server/modules/filter/nullfilter/nullfilter.hh index 6344b5cea..f5f139a78 100644 --- a/server/modules/filter/nullfilter/nullfilter.hh +++ b/server/modules/filter/nullfilter/nullfilter.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/nullfilter/nullfiltersession.cc b/server/modules/filter/nullfilter/nullfiltersession.cc index 5fbc48472..a43e35fa2 100644 --- a/server/modules/filter/nullfilter/nullfiltersession.cc +++ b/server/modules/filter/nullfilter/nullfiltersession.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/nullfilter/nullfiltersession.hh b/server/modules/filter/nullfilter/nullfiltersession.hh index 3102b8cb3..9d455c278 100644 --- a/server/modules/filter/nullfilter/nullfiltersession.hh +++ b/server/modules/filter/nullfilter/nullfiltersession.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/qlafilter/qlafilter.c b/server/modules/filter/qlafilter/qlafilter.c index 0b852891a..0f4b9ee1a 100644 --- a/server/modules/filter/qlafilter/qlafilter.c +++ b/server/modules/filter/qlafilter/qlafilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/regexfilter/regexfilter.c b/server/modules/filter/regexfilter/regexfilter.c index 3e1991155..525b0e9d3 100644 --- a/server/modules/filter/regexfilter/regexfilter.c +++ b/server/modules/filter/regexfilter/regexfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/tee/CMakeLists.txt b/server/modules/filter/tee/CMakeLists.txt index 8673d68e0..16bc63e8e 100644 --- a/server/modules/filter/tee/CMakeLists.txt +++ b/server/modules/filter/tee/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(tee SHARED tee.c) -target_link_libraries(tee maxscale-common) +add_library(tee SHARED tee.cc teesession.cc local_client.cc) +target_link_libraries(tee maxscale-common MySQLCommon) set_target_properties(tee PROPERTIES VERSION "1.0.0") install_module(tee core) diff --git a/server/modules/filter/tee/local_client.cc b/server/modules/filter/tee/local_client.cc new file mode 100644 index 000000000..263379031 --- /dev/null +++ b/server/modules/filter/tee/local_client.cc @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include "local_client.hh" + +#include + +// TODO: Find a way to cleanly expose this +#include "../../../core/maxscale/worker.hh" + +#ifdef EPOLLRDHUP +#define ERROR_EVENTS (EPOLLRDHUP | EPOLLHUP) +#else +#define ERROR_EVENTS EPOLLHUP +#endif + +static const uint32_t poll_events = EPOLLIN | EPOLLOUT | EPOLLET | ERROR_EVENTS; + +LocalClient::LocalClient(MXS_SESSION* session, int fd): + m_state(VC_WAITING_HANDSHAKE), + m_sock(fd), + m_expected_bytes(0), + m_session(session) +{ + MXS_POLL_DATA::handler = LocalClient::poll_handler; + MySQLProtocol* client = (MySQLProtocol*)m_session->client_dcb->protocol; + m_protocol = {}; + m_protocol.charset = client->charset; + m_protocol.client_capabilities = client->client_capabilities; + m_protocol.extra_capabilities = client->extra_capabilities; +} + +LocalClient::~LocalClient() +{ + if (m_state != VC_ERROR) + { + close(m_sock); + } +} + +bool LocalClient::queue_query(GWBUF* buffer) +{ + GWBUF* my_buf = NULL; + + if (m_state != VC_ERROR && (my_buf = gwbuf_clone(buffer))) + { + m_queue.push_back(my_buf); + + if (m_state == VC_OK) + { + drain_queue(); + } + } + + return my_buf != NULL; +} + +void LocalClient::error() +{ + close(m_sock); + m_state = VC_ERROR; +} + +void LocalClient::process(uint32_t events) +{ + + if (events & EPOLLIN) + { + GWBUF* buf = read_complete_packet(); + + if (buf) + { + if (m_state == VC_WAITING_HANDSHAKE) + { + if (gw_decode_mysql_server_handshake(&m_protocol, GWBUF_DATA(buf) + MYSQL_HEADER_LEN) == 0) + { + GWBUF* response = gw_generate_auth_response(m_session, &m_protocol, false, false); + m_queue.push_front(response); + m_state = VC_RESPONSE_SENT; + } + else + { + error(); + } + } + else if (m_state == VC_RESPONSE_SENT) + { + if (mxs_mysql_is_ok_packet(buf)) + { + m_state = VC_OK; + } + else + { + error(); + } + } + + gwbuf_free(buf); + } + } + + if (events & EPOLLOUT) + { + /** Queue is drained */ + } + + if (events & ERROR_EVENTS) + { + error(); + } + + if (m_queue.size() && m_state != VC_ERROR) + { + drain_queue(); + } +} + +GWBUF* LocalClient::read_complete_packet() +{ + GWBUF* rval = NULL; + + while (true) + { + uint8_t buffer[1024]; + int rc = read(m_sock, buffer, sizeof(buffer)); + + if (rc == -1) + { + if (errno != EAGAIN && errno != EWOULDBLOCK) + { + MXS_ERROR("Failed to read from backend: %d, %s", errno, mxs_strerror(errno)); + error(); + } + break; + } + + mxs::Buffer chunk(buffer, rc); + m_partial.append(chunk); + size_t len = m_partial.length(); + + if (m_expected_bytes == 0 && len >= 3) + { + mxs::Buffer::iterator iter = m_partial.begin(); + m_expected_bytes = MYSQL_HEADER_LEN; + m_expected_bytes += *iter++; + m_expected_bytes += (*iter++ << 8); + m_expected_bytes += (*iter++ << 16); + } + + if (len >= m_expected_bytes) + { + /** Read complete packet. Reset expected byte count and make + * the buffer contiguous. */ + m_expected_bytes = 0; + m_partial.make_contiguous(); + rval = m_partial.release(); + break; + } + + } + + return rval; +} + +void LocalClient::drain_queue() +{ + bool more = true; + + while (m_queue.size() && more) + { + /** Grab a buffer from the queue */ + GWBUF* buf = m_queue.front().release(); + m_queue.pop_front(); + + while (buf) + { + int rc = write(m_sock, GWBUF_DATA(buf), GWBUF_LENGTH(buf)); + + if (rc > 0) + { + buf = gwbuf_consume(buf, rc); + } + else + { + if (rc == -1 && errno != EAGAIN && errno != EWOULDBLOCK) + { + MXS_ERROR("Failed to write to backend: %d, %s", errno, mxs_strerror(errno)); + error(); + } + + m_queue.push_front(buf); + more = false; + break; + } + } + } +} + +uint32_t LocalClient::poll_handler(struct mxs_poll_data* data, int wid, uint32_t events) +{ + LocalClient* client = static_cast(data); + client->process(events); + return 0; +} + +LocalClient* LocalClient::create(MXS_SESSION* session, SERVICE* service) +{ + LocalClient* rval = NULL; + LISTENER_ITERATOR iter; + + for (SERV_LISTENER* listener = listener_iterator_init(service, &iter); + listener; listener = listener_iterator_next(&iter)) + { + if (listener->port > 0) + { + /** Pick the first network listener */ + sockaddr_storage addr; + int fd = open_network_socket(MXS_SOCKET_NETWORK, &addr, "127.0.0.1", + service->ports->port); + + if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0 || errno == EINPROGRESS) + { + LocalClient* relay = new (std::nothrow) LocalClient(session, fd); + + if (relay) + { + mxs::Worker* worker = mxs::Worker::get_current(); + + if (worker->add_fd(fd, poll_events, (MXS_POLL_DATA*)relay)) + { + rval = relay; + } + else + { + delete rval; + rval = NULL; + } + } + } + + break; + } + } + + return rval; +} diff --git a/server/modules/filter/tee/local_client.hh b/server/modules/filter/tee/local_client.hh new file mode 100644 index 000000000..b20af3c7f --- /dev/null +++ b/server/modules/filter/tee/local_client.hh @@ -0,0 +1,75 @@ +#pragma once +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include + +#include + +#include +#include +#include + +/** A DCB-like client abstraction which ignores responses */ +class LocalClient: public MXS_POLL_DATA +{ + LocalClient(const LocalClient&); + LocalClient& operator=(const LocalClient&); + +public: + ~LocalClient(); + + /** + * Create a local client for a service + * + * @param session Client session + * @param service Service to connect to + * + * @return New virtual client or NULL on error + */ + static LocalClient* create(MXS_SESSION* session, SERVICE* service); + + /** + * Queue a new query for execution + * + * @param buffer Buffer containing the query + * + * @return True if query was successfully queued + */ + bool queue_query(GWBUF* buffer); + +private: + LocalClient(MXS_SESSION* session, int fd); + static uint32_t poll_handler(struct mxs_poll_data* data, int wid, uint32_t events); + void process(uint32_t events); + GWBUF* read_complete_packet(); + void drain_queue(); + void error(); + + /** Client states */ + enum vc_state + { + VC_WAITING_HANDSHAKE, // Initial state + VC_RESPONSE_SENT, // Handshake received and response sent + VC_OK, // Authentication is complete, ready for queries + VC_ERROR // Something went wrong + }; + + vc_state m_state; + int m_sock; + mxs::Buffer m_partial; + size_t m_expected_bytes; + std::deque m_queue; + MXS_SESSION* m_session; + MySQLProtocol m_protocol; +}; diff --git a/server/modules/filter/tee/tee.c b/server/modules/filter/tee/tee.c deleted file mode 100644 index c16a37b19..000000000 --- a/server/modules/filter/tee/tee.c +++ /dev/null @@ -1,1001 +0,0 @@ -/* - * Copyright (c) 2016 MariaDB Corporation Ab - * - * Use of this software is governed by the Business Source License included - * in the LICENSE.TXT file and at www.mariadb.com/bsl11. - * - * Change Date: 2019-07-01 - * - * On the date above, in accordance with the Business Source License, use - * of this software will be governed by version 2 or later of the General - * Public License. - */ - -/** - * @file tee.c A filter that splits the processing pipeline in two - * @verbatim - * - * Conditionally duplicate requests and send the duplicates to another service - * within MaxScale. - * - * Parameters - * ========== - * - * service The service to send the duplicates to - * source The source address to match in order to duplicate (optional) - * match A regular expression to match in order to perform duplication - * of the request (optional) - * nomatch A regular expression to match in order to prevent duplication - * of the request (optional) - * user A user name to match against. If present only requests that - * originate from this user will be duplciated (optional) - * - * Revision History - * ================ - * - * Date Who Description - * 20/06/2014 Mark Riddoch Initial implementation - * 24/06/2014 Mark Riddoch Addition of support for multi-packet queries - * 12/12/2014 Mark Riddoch Add support for otehr packet types - * - * @endverbatim - */ - -#define MXS_MODULE_NAME "tee" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MYSQL_COM_QUIT 0x01 -#define MYSQL_COM_INITDB 0x02 -#define MYSQL_COM_FIELD_LIST 0x04 -#define MYSQL_COM_CHANGE_USER 0x11 -#define MYSQL_COM_STMT_PREPARE 0x16 -#define MYSQL_COM_STMT_EXECUTE 0x17 -#define MYSQL_COM_STMT_SEND_LONG_DATA 0x18 -#define MYSQL_COM_STMT_CLOSE 0x19 -#define MYSQL_COM_STMT_RESET 0x1a -#define MYSQL_COM_CONNECT 0x1b - -#define REPLY_TIMEOUT_SECOND 5 -#define REPLY_TIMEOUT_MILLISECOND 1 -#define PARENT 0 -#define CHILD 1 - -#ifdef SS_DEBUG -static int debug_seq = 0; -#endif - -static unsigned char required_packets[] = -{ - MYSQL_COM_QUIT, - MYSQL_COM_INITDB, - MYSQL_COM_CHANGE_USER, - MYSQL_COM_STMT_PREPARE, - MYSQL_COM_STMT_EXECUTE, - MYSQL_COM_STMT_SEND_LONG_DATA, - MYSQL_COM_STMT_CLOSE, - MYSQL_COM_STMT_RESET, - MYSQL_COM_CONNECT, - 0 -}; - -/* - * The filter entry points - */ -static MXS_FILTER *createInstance(const char* name, char **options, MXS_CONFIG_PARAMETER *); -static MXS_FILTER_SESSION *newSession(MXS_FILTER *instance, MXS_SESSION *session); -static void closeSession(MXS_FILTER *instance, MXS_FILTER_SESSION *session); -static void freeSession(MXS_FILTER *instance, MXS_FILTER_SESSION *session); -static void setDownstream(MXS_FILTER *instance, MXS_FILTER_SESSION *fsession, MXS_DOWNSTREAM *downstream); -static void setUpstream(MXS_FILTER *instance, MXS_FILTER_SESSION *fsession, MXS_UPSTREAM *upstream); -static int routeQuery(MXS_FILTER *instance, MXS_FILTER_SESSION *fsession, GWBUF *queue); -static int clientReply(MXS_FILTER *instance, MXS_FILTER_SESSION *fsession, GWBUF *queue); -static void diagnostic(MXS_FILTER *instance, MXS_FILTER_SESSION *fsession, DCB *dcb); -static json_t* diagnostic_json(const MXS_FILTER *instance, const MXS_FILTER_SESSION *fsession); -static uint64_t getCapabilities(MXS_FILTER* instance); - -/** - * The instance structure for the TEE filter - this holds the configuration - * information for the filter. - */ -typedef struct -{ - SERVICE *service; /* The service to duplicate requests to */ - char *source; /* The source of the client connection */ - char *userName; /* The user name to filter on */ - char *match; /* Optional text to match against */ - regex_t re; /* Compiled regex text */ - char *nomatch; /* Optional text to match against for exclusion */ - regex_t nore; /* Compiled regex nomatch text */ -} TEE_INSTANCE; - -/** - * The session structure for this TEE filter. - * This stores the downstream filter information, such that the - * filter is able to pass the query on to the next filter (or router) - * in the chain. - * - * It also holds the file descriptor to which queries are written. - */ -typedef struct -{ - MXS_DOWNSTREAM down; /* The downstream filter */ - MXS_UPSTREAM up; /* The upstream filter */ - int active; /* filter is active? */ - bool use_ok; - int client_multistatement; - bool multipacket[2]; - unsigned char command; - bool waiting[2]; /* if the client is waiting for a reply */ - int eof[2]; - int replies[2]; /* Number of queries received */ - int reply_packets[2]; /* Number of OK, ERR, LOCAL_INFILE_REQUEST or RESULT_SET packets received */ - DCB *branch_dcb; /* Client DCB for "branch" service */ - MXS_SESSION *branch_session; /* The branch service session */ - TEE_INSTANCE *instance; - int n_duped; /* Number of duplicated queries */ - int n_rejected; /* Number of rejected queries */ - int residual; /* Any outstanding SQL text */ - GWBUF* tee_replybuf; /* Buffer for reply */ - GWBUF* tee_partials[2]; - GWBUF* queue; - SPINLOCK tee_lock; - DCB* client_dcb; - -#ifdef SS_DEBUG - long d_id; -#endif -} TEE_SESSION; - -typedef struct orphan_session_tt -{ - MXS_SESSION* session; /*< The child branch session whose parent was freed before - * the child session was in a suitable state. */ - struct orphan_session_tt* next; -} orphan_session_t; - -#ifdef SS_DEBUG -static SPINLOCK debug_lock; -static long debug_id = 0; -#endif - -static orphan_session_t* allOrphans = NULL; - -static SPINLOCK orphanLock; -static int packet_is_required(GWBUF *queue); -static int detect_loops(TEE_INSTANCE *instance, HASHTABLE* ht, SERVICE* session); -int internal_route(DCB* dcb); -GWBUF* clone_query(TEE_INSTANCE* my_instance, TEE_SESSION* my_session, GWBUF* buffer); -int route_single_query(TEE_INSTANCE* my_instance, - TEE_SESSION* my_session, - GWBUF* buffer, - GWBUF* clone); -int reset_session_state(TEE_SESSION* my_session, GWBUF* buffer); -void create_orphan(MXS_SESSION* ses); - -static void -orphan_free(void* data) -{ - spinlock_acquire(&orphanLock); - orphan_session_t *ptr = allOrphans, *finished = NULL, *tmp = NULL; -#ifdef SS_DEBUG - int o_stopping = 0, o_ready = 0, o_freed = 0; -#endif - while (ptr) - { - if (ptr->session->state == SESSION_STATE_TO_BE_FREED) - { - if (ptr == allOrphans) - { - tmp = ptr; - allOrphans = ptr->next; - } - else - { - tmp = allOrphans; - while (tmp && tmp->next != ptr) - { - tmp = tmp->next; - } - if (tmp) - { - tmp->next = ptr->next; - tmp = ptr; - } - } - } - - /* - * The session has been unlinked from all the DCBs and it is ready to be freed. - */ - - if (ptr->session->state == SESSION_STATE_STOPPING && - ptr->session->refcount == 0 && ptr->session->client_dcb == NULL) - { - ptr->session->state = SESSION_STATE_TO_BE_FREED; - } -#ifdef SS_DEBUG - else if (ptr->session->state == SESSION_STATE_STOPPING) - { - o_stopping++; - } - else if (ptr->session->state == SESSION_STATE_ROUTER_READY) - { - o_ready++; - } -#endif - ptr = ptr->next; - if (tmp) - { - tmp->next = finished; - finished = tmp; - tmp = NULL; - } - } - - spinlock_release(&orphanLock); - -#ifdef SS_DEBUG - if (o_stopping + o_ready > 0) - { - MXS_DEBUG("%d orphans in " - "SESSION_STATE_STOPPING, %d orphans in " - "SESSION_STATE_ROUTER_READY. ", o_stopping, o_ready); - } -#endif - - while (finished) - { -#ifdef SS_DEBUG - o_freed++; -#endif - tmp = finished; - finished = finished->next; - - tmp->session->service->router->freeSession( - tmp->session->service->router_instance, - tmp->session->router_session); - - tmp->session->state = SESSION_STATE_FREE; - MXS_FREE(tmp->session); - MXS_FREE(tmp); - } - -#ifdef SS_DEBUG - MXS_DEBUG("%d orphans freed.", o_freed); -#endif -} - -static const MXS_ENUM_VALUE option_values[] = -{ - {"ignorecase", REG_ICASE}, - {"case", 0}, - {"extended", REG_EXTENDED}, - {NULL} -}; - -/** - * The module entry point routine. It is this routine that - * must populate the structure that is referred to as the - * "module object", this is a structure with the set of - * external entry points for this module. - * - * @return The module object - */ -MXS_MODULE* MXS_CREATE_MODULE() -{ - spinlock_init(&orphanLock); -#ifdef SS_DEBUG - spinlock_init(&debug_lock); -#endif - - static MXS_FILTER_OBJECT MyObject = - { - createInstance, - newSession, - closeSession, - freeSession, - setDownstream, - setUpstream, - routeQuery, - clientReply, - diagnostic, - diagnostic_json, - getCapabilities, - NULL, // No destroyInstance - }; - - static MXS_MODULE info = - { - MXS_MODULE_API_FILTER, - MXS_MODULE_GA, - MXS_FILTER_VERSION, - "A tee piece in the filter plumbing", - "V1.0.0", - RCAP_TYPE_CONTIGUOUS_INPUT, - &MyObject, - NULL, /* Process init. */ - NULL, /* Process finish. */ - NULL, /* Thread init. */ - NULL, /* Thread finish. */ - { - {"service", MXS_MODULE_PARAM_SERVICE, NULL, MXS_MODULE_OPT_REQUIRED}, - {"match", MXS_MODULE_PARAM_STRING}, - {"exclude", MXS_MODULE_PARAM_STRING}, - {"source", MXS_MODULE_PARAM_STRING}, - {"user", MXS_MODULE_PARAM_STRING}, - { - "options", - MXS_MODULE_PARAM_ENUM, - "ignorecase", - MXS_MODULE_OPT_NONE, - option_values - }, - {MXS_END_MODULE_PARAMS} - } - }; - - return &info; -} - -/** - * Create an instance of the filter for a particular service - * within MaxScale. - * - * @param name The name of the instance (as defined in the config file). - * @param options The options for this filter - * @param params The array of name/value pair parameters for the filter - * - * @return The instance data for this new instance - */ -static MXS_FILTER * -createInstance(const char *name, char **options, MXS_CONFIG_PARAMETER *params) -{ - TEE_INSTANCE *my_instance = MXS_CALLOC(1, sizeof(TEE_INSTANCE)); - - if (my_instance) - { - my_instance->service = config_get_service(params, "service"); - my_instance->source = config_copy_string(params, "source"); - my_instance->userName = config_copy_string(params, "user"); - my_instance->match = config_copy_string(params, "match"); - my_instance->nomatch = config_copy_string(params, "exclude"); - - int cflags = config_get_enum(params, "options", option_values); - - if (my_instance->match && regcomp(&my_instance->re, my_instance->match, cflags)) - { - MXS_ERROR("Invalid regular expression '%s' for the match parameter.", - my_instance->match); - MXS_FREE(my_instance->match); - MXS_FREE(my_instance->nomatch); - MXS_FREE(my_instance->source); - MXS_FREE(my_instance->userName); - MXS_FREE(my_instance); - return NULL; - } - - if (my_instance->nomatch && regcomp(&my_instance->nore, my_instance->nomatch, cflags)) - { - MXS_ERROR("Invalid regular expression '%s' for the nomatch paramter.", - my_instance->nomatch); - if (my_instance->match) - { - regfree(&my_instance->re); - MXS_FREE(my_instance->match); - } - MXS_FREE(my_instance->nomatch); - MXS_FREE(my_instance->source); - MXS_FREE(my_instance->userName); - MXS_FREE(my_instance); - return NULL; - } - } - - return (MXS_FILTER *) my_instance; -} - -/** - * Associate a new session with this instance of the filter. - * - * Create the file to log to and open it. - * - * @param instance The filter instance data - * @param session The session itself - * @return Session specific data for this session - */ -static MXS_FILTER_SESSION * -newSession(MXS_FILTER *instance, MXS_SESSION *session) -{ - TEE_INSTANCE *my_instance = (TEE_INSTANCE *) instance; - TEE_SESSION *my_session; - const char *remote, *userName; - - if (strcmp(my_instance->service->name, session->service->name) == 0) - { - MXS_ERROR("%s: Recursive use of tee filter in service.", - session->service->name); - my_session = NULL; - goto retblock; - } - - HASHTABLE* ht = hashtable_alloc(100, hashtable_item_strhash, hashtable_item_strcmp); - bool is_loop = detect_loops(my_instance, ht, session->service); - hashtable_free(ht); - - if (is_loop) - { - MXS_ERROR("%s: Recursive use of tee filter in service.", - session->service->name); - my_session = NULL; - goto retblock; - } - - if ((my_session = MXS_CALLOC(1, sizeof(TEE_SESSION))) != NULL) - { - my_session->active = 1; - my_session->residual = 0; - my_session->tee_replybuf = NULL; - my_session->client_dcb = session->client_dcb; - my_session->instance = my_instance; - my_session->client_multistatement = false; - my_session->queue = NULL; - spinlock_init(&my_session->tee_lock); - if (my_instance->source && - (remote = session_get_remote(session)) != NULL) - { - if (strcmp(remote, my_instance->source)) - { - my_session->active = 0; - - MXS_WARNING("Tee filter is not active."); - } - } - userName = session_get_user(session); - - if (my_instance->userName && - userName && - strcmp(userName, my_instance->userName)) - { - my_session->active = 0; - - MXS_WARNING("Tee filter is not active."); - } - - if (my_session->active) - { - DCB* dcb; - MXS_SESSION* ses; - if ((dcb = dcb_clone(session->client_dcb)) == NULL) - { - freeSession(instance, (MXS_FILTER_SESSION *) my_session); - my_session = NULL; - - MXS_ERROR("Creating client DCB for Tee " - "filter failed. Terminating session."); - - goto retblock; - } - - dcb->service = my_instance->service; - - if ((ses = session_alloc(my_instance->service, dcb)) == NULL) - { - dcb_close(dcb); - freeSession(instance, (MXS_FILTER_SESSION *) my_session); - my_session = NULL; - MXS_ERROR("Creating client session for Tee " - "filter failed. Terminating session."); - - goto retblock; - } - - ss_dassert(ses->ses_is_child); - - my_session->branch_session = ses; - my_session->branch_dcb = dcb; - } - } -retblock: - return (MXS_FILTER_SESSION*)my_session; -} - -/** - * Close a session with the filter, this is the mechanism - * by which a filter may cleanup data structure etc. - * In the case of the tee filter we need to close down the - * "branch" session. - * - * @param instance The filter instance data - * @param session The session being closed - */ -static void -closeSession(MXS_FILTER *instance, MXS_FILTER_SESSION *session) -{ - TEE_SESSION *my_session = (TEE_SESSION *) session; - MXS_ROUTER_OBJECT *router; - void *router_instance, *rsession; - MXS_SESSION *bsession; -#ifdef SS_DEBUG - MXS_INFO("Tee close: %d", atomic_add(&debug_seq, 1)); -#endif - if (my_session->active) - { - - if ((bsession = my_session->branch_session) != NULL) - { - CHK_SESSION(bsession); - bsession->ses_is_child = false; - session_close(bsession); - } - - if (my_session->waiting[PARENT]) - { - if (my_session->command != 0x01 && - my_session->client_dcb && - my_session->client_dcb->state == DCB_STATE_POLLING) - { - MXS_INFO("Tee session closed mid-query."); - GWBUF* errbuf = modutil_create_mysql_err_msg(1, 0, 1, "00000", "Session closed."); - my_session->client_dcb->func.write(my_session->client_dcb, errbuf); - } - } - - - my_session->active = 0; - } -} - -/** - * Free the memory associated with the session - * - * @param instance The filter instance - * @param session The filter session - */ -static void -freeSession(MXS_FILTER *instance, MXS_FILTER_SESSION *session) -{ - TEE_SESSION *my_session = (TEE_SESSION *) session; - MXS_SESSION* ses = my_session->branch_session; - mxs_session_state_t state; -#ifdef SS_DEBUG - MXS_INFO("Tee free: %d", atomic_add(&debug_seq, 1)); -#endif - if (ses != NULL) - { - state = ses->state; - - if (state == SESSION_STATE_ROUTER_READY) - { - session_put_ref(ses); - } - else if (state == SESSION_STATE_TO_BE_FREED) - { - /** Free branch router session */ - ses->service->router->freeSession( - ses->service->router_instance, - ses->router_session); - /** Free memory of branch client session */ - ses->state = SESSION_STATE_FREE; - MXS_FREE(ses); - /** This indicates that branch session is not available anymore */ - my_session->branch_session = NULL; - } - else if (state == SESSION_STATE_STOPPING) - { - create_orphan(ses); - } - } - if (my_session->tee_replybuf) - { - gwbuf_free(my_session->tee_replybuf); - } - MXS_FREE(session); - - orphan_free(NULL); - - return; -} - -/** - * Set the downstream filter or router to which queries will be - * passed from this filter. - * - * @param instance The filter instance data - * @param session The filter session - * @param downstream The downstream filter or router. - */ -static void -setDownstream(MXS_FILTER *instance, MXS_FILTER_SESSION *session, MXS_DOWNSTREAM *downstream) -{ - TEE_SESSION *my_session = (TEE_SESSION *) session; - my_session->down = *downstream; -} - -/** - * Set the downstream filter or router to which queries will be - * passed from this filter. - * - * @param instance The filter instance data - * @param session The filter session - * @param downstream The downstream filter or router. - */ -static void -setUpstream(MXS_FILTER *instance, MXS_FILTER_SESSION *session, MXS_UPSTREAM *upstream) -{ - TEE_SESSION *my_session = (TEE_SESSION *) session; - my_session->up = *upstream; -} - -/** - * The routeQuery entry point. This is passed the query buffer - * to which the filter should be applied. Once applied the - * query should normally be passed to the downstream component - * (filter or router) in the filter chain. - * - * If my_session->residual is set then duplicate that many bytes - * and send them to the branch. - * - * If my_session->residual is zero then this must be a new request - * Extract the SQL text if possible, match against that text and forward - * the request. If the requets is not contained witin the packet we have - * then set my_session->residual to the number of outstanding bytes - * - * @param instance The filter instance data - * @param session The filter session - * @param queue The query data - */ -static int -routeQuery(MXS_FILTER *instance, MXS_FILTER_SESSION *session, GWBUF *queue) -{ - TEE_INSTANCE *my_instance = (TEE_INSTANCE *) instance; - TEE_SESSION *my_session = (TEE_SESSION *) session; - GWBUF *clone = clone_query(my_instance, my_session, queue); - - return route_single_query(my_instance, my_session, queue, clone); -} - -/** - * The clientReply entry point. This is passed the response buffer - * to which the filter should be applied. Once processed the - * query is passed to the upstream component - * (filter or router) in the filter chain. - * - * @param instance The filter instance data - * @param session The filter session - * @param reply The response data - */ -static int -clientReply(MXS_FILTER* instance, MXS_FILTER_SESSION *session, GWBUF *reply) -{ - int rc = 1, branch, eof; - TEE_SESSION *my_session = (TEE_SESSION *) session; - - return my_session->up.clientReply(my_session->up.instance, - my_session->up.session, - reply); -} - -/** - * Diagnostics routine - * - * If fsession is NULL then print diagnostics on the filter - * instance as a whole, otherwise print diagnostics for the - * particular session. - * - * @param instance The filter instance - * @param fsession Filter session, may be NULL - * @param dcb The DCB for diagnostic output - */ -static void -diagnostic(MXS_FILTER *instance, MXS_FILTER_SESSION *fsession, DCB *dcb) -{ - TEE_INSTANCE *my_instance = (TEE_INSTANCE *) instance; - TEE_SESSION *my_session = (TEE_SESSION *) fsession; - - if (my_instance->source) - { - dcb_printf(dcb, "\t\tLimit to connections from %s\n", - my_instance->source); - } - dcb_printf(dcb, "\t\tDuplicate statements to service %s\n", - my_instance->service->name); - if (my_instance->userName) - { - dcb_printf(dcb, "\t\tLimit to user %s\n", - my_instance->userName); - } - if (my_instance->match) - { - dcb_printf(dcb, "\t\tInclude queries that match %s\n", - my_instance->match); - } - if (my_instance->nomatch) - { - dcb_printf(dcb, "\t\tExclude queries that match %s\n", - my_instance->nomatch); - } - if (my_session) - { - dcb_printf(dcb, "\t\tNo. of statements duplicated: %d.\n", - my_session->n_duped); - dcb_printf(dcb, "\t\tNo. of statements rejected: %d.\n", - my_session->n_rejected); - } -} - -/** - * Diagnostics routine - * - * If fsession is NULL then print diagnostics on the filter - * instance as a whole, otherwise print diagnostics for the - * particular session. - * - * @param instance The filter instance - * @param fsession Filter session, may be NULL - */ -static json_t* diagnostic_json(const MXS_FILTER *instance, const MXS_FILTER_SESSION *fsession) -{ - TEE_INSTANCE *my_instance = (TEE_INSTANCE*)instance; - TEE_SESSION *my_session = (TEE_SESSION*)fsession; - - json_t* rval = json_object(); - - if (my_instance->source) - { - json_object_set_new(rval, "source", json_string(my_instance->source)); - } - - json_object_set_new(rval, "service", json_string(my_instance->service->name)); - - if (my_instance->userName) - { - json_object_set_new(rval, "user", json_string(my_instance->userName)); - } - - if (my_instance->match) - { - json_object_set_new(rval, "match", json_string(my_instance->match)); - } - - if (my_instance->nomatch) - { - json_object_set_new(rval, "exclude", json_string(my_instance->nomatch)); - } - - if (my_session) - { - json_object_set_new(rval, "duplicated", json_integer(my_session->n_duped)); - json_object_set_new(rval, "rejected", json_integer(my_session->n_duped)); - } - - return rval; -} - -/** - * Capability routine. - * - * @return The capabilities of the filter. - */ -static uint64_t getCapabilities(MXS_FILTER* instance) -{ - return RCAP_TYPE_NONE; -} - -/** - * Determine if the packet is a command that must be sent to the branch - * to maintain the session consistancy. These are COM_INIT_DB, - * COM_CHANGE_USER and COM_QUIT packets. - * - * @param queue The buffer to check - * @return non-zero if the packet should be sent to the branch - */ -static int -packet_is_required(GWBUF *queue) -{ - uint8_t *ptr; - int i; - - ptr = GWBUF_DATA(queue); - if (GWBUF_LENGTH(queue) > 4) - { - for (i = 0; required_packets[i]; i++) - { - if (ptr[4] == required_packets[i]) - { - return 1; - } - } - } - return 0; -} - -/** - * Detects possible loops in the query cloning chain. - */ -int detect_loops(TEE_INSTANCE *instance, HASHTABLE* ht, SERVICE* service) -{ - SERVICE* svc = service; - int i; - - if (ht == NULL) - { - return -1; - } - - if (hashtable_add(ht, (void*) service->name, (void*) true) == 0) - { - return true; - } - - for (i = 0; i < svc->n_filters; i++) - { - const char* module = filter_def_get_module_name(svc->filters[i]); - if (strcmp(module, "tee") == 0) - { - /* - * Found a Tee filter, recurse down its path - * if the service name isn't already in the hashtable. - */ - - TEE_INSTANCE* ninst = (TEE_INSTANCE*)filter_def_get_instance(svc->filters[i]); - if (ninst == NULL) - { - /** - * This tee instance hasn't been initialized yet and full - * resolution of recursion cannot be done now. - */ - continue; - } - SERVICE* tgt = ninst->service; - - if (detect_loops(ninst, ht, tgt)) - { - return true; - } - - } - } - - return false; -} - -GWBUF* clone_query(TEE_INSTANCE* my_instance, TEE_SESSION* my_session, GWBUF* buffer) -{ - GWBUF* clone = NULL; - - if ((!my_instance->match && !my_instance->nomatch) || packet_is_required(buffer)) - { - clone = gwbuf_clone(buffer); - } - else - { - char *ptr = modutil_get_SQL(buffer); - - if (ptr) - { - if ((my_instance->match && regexec(&my_instance->re, ptr, 0, NULL, 0) == 0) || - (my_instance->nomatch && regexec(&my_instance->nore, ptr, 0, NULL, 0) != 0)) - { - clone = gwbuf_clone(buffer); - } - MXS_FREE(ptr); - } - } - - return clone; -} - -/** - * Route the main query downstream along the main filter chain and possibly route - * a clone of the buffer to the branch session. If the clone buffer is NULL, nothing - * is routed to the branch session. - * @param my_instance Tee instance - * @param my_session Tee session - * @param buffer Main buffer - * @param clone Cloned buffer - * @return 1 on success, 0 on failure. - */ -int route_single_query(TEE_INSTANCE* my_instance, TEE_SESSION* my_session, GWBUF* buffer, GWBUF* clone) -{ - int rval = 0; - - if (my_session->active && my_session->branch_session && - my_session->branch_session->state == SESSION_STATE_ROUTER_READY) - { - - rval = my_session->down.routeQuery(my_session->down.instance, - my_session->down.session, - buffer); - if (clone) - { - my_session->n_duped++; - - if (my_session->branch_session->state == SESSION_STATE_ROUTER_READY) - { - MXS_SESSION_ROUTE_QUERY(my_session->branch_session, clone); - } - else - { - /** Close tee session */ - my_session->active = 0; - rval = 0; - MXS_INFO("Closed tee filter session: Child session in invalid state."); - gwbuf_free(clone); - } - } - } - - return rval; -} - -/** - * Reset the session's internal counters. - * @param my_session Tee session - * @param buffer Buffer with the query of the main branch in it - * @return 1 on success, 0 on error - */ -int reset_session_state(TEE_SESSION* my_session, GWBUF* buffer) -{ - if (gwbuf_length(buffer) < 5) - { - return 0; - } - - unsigned char command = *((unsigned char*) buffer->start + 4); - - switch (command) - { - case 0x1b: - my_session->client_multistatement = *((unsigned char*) buffer->start + 5); - MXS_INFO("client %s multistatements", - my_session->client_multistatement ? "enabled" : "disabled"); - case 0x03: - case 0x16: - case 0x17: - case 0x04: - case 0x0a: - memset(my_session->multipacket, (char) true, 2 * sizeof(bool)); - break; - default: - memset(my_session->multipacket, (char) false, 2 * sizeof(bool)); - break; - } - - memset(my_session->replies, 0, 2 * sizeof(int)); - memset(my_session->reply_packets, 0, 2 * sizeof(int)); - memset(my_session->eof, 0, 2 * sizeof(int)); - memset(my_session->waiting, 1, 2 * sizeof(bool)); - my_session->command = command; - - return 1; -} - -void create_orphan(MXS_SESSION* ses) -{ - orphan_session_t* orphan = MXS_MALLOC(sizeof(orphan_session_t)); - if (orphan) - { - orphan->session = ses; - spinlock_acquire(&orphanLock); - orphan->next = allOrphans; - allOrphans = orphan; - spinlock_release(&orphanLock); - } -} diff --git a/server/modules/filter/tee/tee.cc b/server/modules/filter/tee/tee.cc new file mode 100644 index 000000000..d92fb2c70 --- /dev/null +++ b/server/modules/filter/tee/tee.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +/** + * @file tee.cc A filter that splits the processing pipeline in two + */ + +#define MXS_MODULE_NAME "tee" + +#include + +#include +#include +#include + +#include "tee.hh" +#include "local_client.hh" +#include "teesession.hh" + +static const MXS_ENUM_VALUE option_values[] = +{ + {"ignorecase", REG_ICASE}, + {"case", 0}, + {"extended", REG_EXTENDED}, + {NULL} +}; + +Tee::Tee(SERVICE* service, const char* user, const char* remote, + const char* match, const char* nomatch, int cflags): + m_service(service), + m_user(user), + m_source(remote), + m_match(match), + m_nomatch(nomatch) +{ + if (*match) + { + ss_debug(int rc = )regcomp(&m_re, match, cflags); + ss_dassert(rc == 0); + } + + if (*nomatch) + { + ss_debug(int rc = )regcomp(&m_nore, nomatch, cflags); + ss_dassert(rc == 0); + } +} + +/** + * Create an instance of the filter for a particular service + * within MaxScale. + * + * @param name The name of the instance (as defined in the config file). + * @param options The options for this filter + * @param params The array of name/value pair parameters for the filter + * + * @return The instance data for this new instance + */ +Tee* Tee::create(const char *name, char **options, MXS_CONFIG_PARAMETER *params) +{ + Tee *my_instance = NULL; + + SERVICE* service = config_get_service(params, "service"); + const char* source = config_get_string(params, "source"); + const char* user = config_get_string(params, "user"); + const char* match = config_get_string(params, "match"); + const char* nomatch = config_get_string(params, "exclude"); + + int cflags = config_get_enum(params, "options", option_values); + regex_t re; + regex_t nore; + + if (*match && regcomp(&re, match, cflags) != 0) + { + MXS_ERROR("Invalid regular expression '%s' for the match parameter.", match); + } + else if (*nomatch && regcomp(&nore, nomatch, cflags) != 0) + { + MXS_ERROR("Invalid regular expression '%s' for the nomatch parameter.", nomatch); + + if (*match) + { + regfree(&re); + } + } + else + { + my_instance = new (std::nothrow) Tee(service, source, user, match, nomatch, cflags); + } + + return my_instance; +} + +TeeSession* Tee::newSession(MXS_SESSION* pSession) +{ + return TeeSession::create(this, pSession); +} + +/** + * Diagnostics routine + * + * If fsession is NULL then print diagnostics on the filter + * instance as a whole, otherwise print diagnostics for the + * particular session. + * + * @param instance The filter instance + * @param fsession Filter session, may be NULL + * @param dcb The DCB for diagnostic output + */ +void Tee::diagnostics(DCB *dcb) +{ + if (m_source.length()) + { + dcb_printf(dcb, "\t\tLimit to connections from %s\n", + m_source.c_str()); + } + dcb_printf(dcb, "\t\tDuplicate statements to service %s\n", + m_service->name); + if (m_user.length()) + { + dcb_printf(dcb, "\t\tLimit to user %s\n", + m_user.c_str()); + } + if (m_match.length()) + { + dcb_printf(dcb, "\t\tInclude queries that match %s\n", + m_match.c_str()); + } + if (m_nomatch.c_str()) + { + dcb_printf(dcb, "\t\tExclude queries that match %s\n", + m_nomatch.c_str()); + } +} + +/** + * Diagnostics routine + * + * If fsession is NULL then print diagnostics on the filter + * instance as a whole, otherwise print diagnostics for the + * particular session. + * + * @param instance The filter instance + * @param fsession Filter session, may be NULL + */ +json_t* Tee::diagnostics_json() const +{ + json_t* rval = json_object(); + + if (m_source.length()) + { + json_object_set_new(rval, "source", json_string(m_source.c_str())); + } + + json_object_set_new(rval, "service", json_string(m_service->name)); + + if (m_user.length()) + { + json_object_set_new(rval, "user", json_string(m_user.c_str())); + } + + if (m_match.length()) + { + json_object_set_new(rval, "match", json_string(m_match.c_str())); + } + + if (m_nomatch.length()) + { + json_object_set_new(rval, "exclude", json_string(m_nomatch.c_str())); + } + + return rval; +} + +MXS_BEGIN_DECLS + +/** + * The module entry point routine. It is this routine that + * must populate the structure that is referred to as the + * "module object", this is a structure with the set of + * external entry points for this module. + * + * @return The module object + */ +MXS_MODULE* MXS_CREATE_MODULE() +{ + + static MXS_MODULE info = + { + MXS_MODULE_API_FILTER, + MXS_MODULE_GA, + MXS_FILTER_VERSION, + "A tee piece in the filter plumbing", + "V1.1.0", + RCAP_TYPE_CONTIGUOUS_INPUT, + &Tee::s_object, + NULL, /* Process init. */ + NULL, /* Process finish. */ + NULL, /* Thread init. */ + NULL, /* Thread finish. */ + { + {"service", MXS_MODULE_PARAM_SERVICE, NULL, MXS_MODULE_OPT_REQUIRED}, + {"match", MXS_MODULE_PARAM_STRING}, + {"exclude", MXS_MODULE_PARAM_STRING}, + {"source", MXS_MODULE_PARAM_STRING}, + {"user", MXS_MODULE_PARAM_STRING}, + { + "options", + MXS_MODULE_PARAM_ENUM, + "ignorecase", + MXS_MODULE_OPT_NONE, + option_values + }, + {MXS_END_MODULE_PARAMS} + } + }; + + return &info; +} + +MXS_END_DECLS diff --git a/server/modules/filter/tee/tee.hh b/server/modules/filter/tee/tee.hh new file mode 100644 index 000000000..df096583c --- /dev/null +++ b/server/modules/filter/tee/tee.hh @@ -0,0 +1,71 @@ +#pragma once +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include + +#include +#include + +#include +#include + +#include "teesession.hh" + +/** + * The instance structure for the TEE filter - this holds the configuration + * information for the filter. + */ +class Tee: public mxs::Filter +{ + Tee(const Tee&); + const Tee& operator=(const Tee&); +public: + + static Tee* create(const char* zName, char** pzOptions, MXS_CONFIG_PARAMETER* ppParams); + TeeSession* newSession(MXS_SESSION* session); + void diagnostics(DCB* pDcb); + json_t* diagnostics_json() const; + + uint64_t getCapabilities() + { + return RCAP_TYPE_CONTIGUOUS_INPUT; + } + + bool user_matches(const char* user)const + { + return m_user.length() == 0 || strcmp(user, m_user.c_str()) == 0; + } + + bool remote_matches(const char* remote)const + { + return m_source.length() == 0 || strcmp(remote, m_source.c_str()) == 0; + } + + SERVICE* get_service() const + { + return m_service; + } + +private: + Tee(SERVICE* service, const char* user, const char* remote, + const char* match, const char* nomatch, int cflags); + + SERVICE* m_service; + std::string m_user; /* The user name to filter on */ + std::string m_source; /* The source of the client connection */ + std::string m_match; /* Optional text to match against */ + std::string m_nomatch; /* Optional text to match against for exclusion */ + regex_t m_re; /* Compiled regex text */ + regex_t m_nore; /* Compiled regex nomatch text */ +}; diff --git a/server/modules/filter/tee/teesession.cc b/server/modules/filter/tee/teesession.cc new file mode 100644 index 000000000..59a8bef78 --- /dev/null +++ b/server/modules/filter/tee/teesession.cc @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include "teesession.hh" +#include "tee.hh" + +#include +#include + +/** + * Detect loops in the filter chain. + */ +bool recursive_tee_usage(std::set& services, SERVICE* service) +{ + if (!services.insert(service->name).second) + { + /** The service name was already in the set */ + return true; + } + + for (int i = 0; i < service->n_filters; i++) + { + const char* module = filter_def_get_module_name(service->filters[i]); + + if (strcmp(module, "tee") == 0) + { + /* + * Found a Tee filter, recurse down its path + * if the service name isn't already in the hashtable. + */ + Tee* inst = (Tee*)filter_def_get_instance(service->filters[i]); + + if (inst == NULL) + { + /** + * This tee instance hasn't been initialized yet and full + * resolution of recursion cannot be done now. + */ + } + else if (recursive_tee_usage(services, inst->get_service())) + { + return true; + } + } + } + + return false; +} + +TeeSession::TeeSession(MXS_SESSION* session, LocalClient* client): + mxs::FilterSession(session), + m_client(client) +{ +} + +TeeSession* TeeSession::create(Tee* my_instance, MXS_SESSION* session) +{ + std::set services; + + if (recursive_tee_usage(services, my_instance->get_service())) + { + MXS_ERROR("%s: Recursive use of tee filter in service.", + session->service->name); + return NULL; + } + + LocalClient* client = NULL; + + if (my_instance->user_matches(session_get_user(session)) && + my_instance->remote_matches(session_get_remote(session))) + { + if ((client = LocalClient::create(session, my_instance->get_service())) == NULL) + { + return NULL; + } + } + + return new (std::nothrow) TeeSession(session, client); +} + +TeeSession::~TeeSession() +{ + delete m_client; +} + +void TeeSession::close() +{ +} + +int TeeSession::routeQuery(GWBUF* queue) +{ + if (m_client) + { + m_client->queue_query(queue); + } + + return mxs::FilterSession::routeQuery(queue); +} + +void TeeSession::diagnostics(DCB *pDcb) +{ +} + +json_t* TeeSession::diagnostics_json() const +{ + return NULL; +} diff --git a/server/modules/filter/tee/teesession.hh b/server/modules/filter/tee/teesession.hh new file mode 100644 index 000000000..9d731b0b7 --- /dev/null +++ b/server/modules/filter/tee/teesession.hh @@ -0,0 +1,43 @@ +#pragma once +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include + +#include + +#include "local_client.hh" + +class Tee; + +/** + * A Tee session + */ +class TeeSession: public mxs::FilterSession +{ + TeeSession(const TeeSession&); + const TeeSession& operator=(const TeeSession&); + +public: + ~TeeSession(); + static TeeSession* create(Tee* my_instance, MXS_SESSION* session); + + void close(); + int routeQuery(GWBUF* pPacket); + void diagnostics(DCB *pDcb); + json_t* diagnostics_json() const; + +private: + TeeSession(MXS_SESSION* session, LocalClient* client); + LocalClient* m_client; /**< The client connection to the local service */ +}; diff --git a/server/modules/filter/topfilter/topfilter.c b/server/modules/filter/topfilter/topfilter.c index 7e9ba8505..03f598f22 100644 --- a/server/modules/filter/topfilter/topfilter.c +++ b/server/modules/filter/topfilter/topfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/filter/tpmfilter/tpmfilter.c b/server/modules/filter/tpmfilter/tpmfilter.c index 739929c5e..5c666e11a 100644 --- a/server/modules/filter/tpmfilter/tpmfilter.c +++ b/server/modules/filter/tpmfilter/tpmfilter.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/include/binlog_common.h b/server/modules/include/binlog_common.h index a961351e0..e881e57d5 100644 --- a/server/modules/include/binlog_common.h +++ b/server/modules/include/binlog_common.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/include/blr_constants.h b/server/modules/include/blr_constants.h index 1fd18eaa1..e88c89aa0 100644 --- a/server/modules/include/blr_constants.h +++ b/server/modules/include/blr_constants.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/include/cdc.h b/server/modules/include/cdc.h index bf8393ca5..df1b61d8e 100644 --- a/server/modules/include/cdc.h +++ b/server/modules/include/cdc.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/include/debugcli.h b/server/modules/include/debugcli.h index 411065566..2f425ebd4 100644 --- a/server/modules/include/debugcli.h +++ b/server/modules/include/debugcli.h @@ -8,7 +8,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/include/telnetd.h b/server/modules/include/telnetd.h index d1ff037a4..d4105a749 100644 --- a/server/modules/include/telnetd.h +++ b/server/modules/include/telnetd.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/auroramon/auroramon.c b/server/modules/monitor/auroramon/auroramon.c index 34f1c867b..43e430738 100644 --- a/server/modules/monitor/auroramon/auroramon.c +++ b/server/modules/monitor/auroramon/auroramon.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/galeramon/galeramon.c b/server/modules/monitor/galeramon/galeramon.c index 9ed720010..93d1d8d3d 100644 --- a/server/modules/monitor/galeramon/galeramon.c +++ b/server/modules/monitor/galeramon/galeramon.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/galeramon/galeramon.h b/server/modules/monitor/galeramon/galeramon.h index d7f305085..924289035 100644 --- a/server/modules/monitor/galeramon/galeramon.h +++ b/server/modules/monitor/galeramon/galeramon.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/mmmon/mmmon.c b/server/modules/monitor/mmmon/mmmon.c index f3a18145b..3253e164e 100644 --- a/server/modules/monitor/mmmon/mmmon.c +++ b/server/modules/monitor/mmmon/mmmon.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/mmmon/mmmon.h b/server/modules/monitor/mmmon/mmmon.h index 5af8ac671..cbf434e1f 100644 --- a/server/modules/monitor/mmmon/mmmon.h +++ b/server/modules/monitor/mmmon/mmmon.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index 73f214e2a..46d43cdba 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index 84ba2f40f..c8c00e498 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -1540,8 +1540,7 @@ static void set_master_heartbeat(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *dat "(maxscale_id INT NOT NULL, " "master_server_id INT NOT NULL, " "master_timestamp INT UNSIGNED NOT NULL, " - "PRIMARY KEY ( master_server_id, maxscale_id ) ) " - "ENGINE=MYISAM DEFAULT CHARSET=latin1")) + "PRIMARY KEY ( master_server_id, maxscale_id ) )")) { MXS_ERROR("Error creating maxscale_schema.replication_heartbeat " "table in Master server: %s", mysql_error(database->con)); diff --git a/server/modules/monitor/mysqlmon/mysql_mon_journal.c b/server/modules/monitor/mysqlmon/mysql_mon_journal.c index 09047e707..f339faa2e 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon_journal.c +++ b/server/modules/monitor/mysqlmon/mysql_mon_journal.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.c b/server/modules/monitor/ndbclustermon/ndbclustermon.c index 26b36677d..7840fe5ac 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.c +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.h b/server/modules/monitor/ndbclustermon/ndbclustermon.h index 91a0d64a5..d01064d98 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.h +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.h @@ -6,7 +6,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/CDC/cdc.c b/server/modules/protocol/CDC/cdc.c index e306b8065..80927a06b 100644 --- a/server/modules/protocol/CDC/cdc.c +++ b/server/modules/protocol/CDC/cdc.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/HTTPD/httpd.c b/server/modules/protocol/HTTPD/httpd.c index 9dcaeb18f..aac27aa0c 100644 --- a/server/modules/protocol/HTTPD/httpd.c +++ b/server/modules/protocol/HTTPD/httpd.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/HTTPD/httpd.h b/server/modules/protocol/HTTPD/httpd.h index 0a6c4e605..e5c402d9c 100644 --- a/server/modules/protocol/HTTPD/httpd.h +++ b/server/modules/protocol/HTTPD/httpd.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c index 6417eb00f..2e8fc4a59 100644 --- a/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c +++ b/server/modules/protocol/MySQL/MySQLBackend/mysql_backend.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -26,30 +26,6 @@ /* * MySQL Protocol module for handling the protocol between the gateway * and the backend MySQL database. - * - * Revision History - * Date Who Description - * 14/06/2013 Mark Riddoch Initial version - * 17/06/2013 Massimiliano Pinto Added MaxScale To Backends routines - * 01/07/2013 Massimiliano Pinto Put Log Manager example code behind SS_DEBUG macros. - * 03/07/2013 Massimiliano Pinto Added delayq for incoming data before mysql connection - * 04/07/2013 Massimiliano Pinto Added asynchronous MySQL protocol connection to backend - * 05/07/2013 Massimiliano Pinto Added closeSession if backend auth fails - * 12/07/2013 Massimiliano Pinto Added Mysql Change User via dcb->func.auth() - * 15/07/2013 Massimiliano Pinto Added Mysql session change via dcb->func.session() - * 17/07/2013 Massimiliano Pinto Added dcb->command update from gwbuf->command for proper routing - * server replies to client via router->clientReply - * 04/09/2013 Massimiliano Pinto Added dcb->session and dcb->session->client checks for NULL - * 12/09/2013 Massimiliano Pinto Added checks in gw_read_backend_event() for gw_read_backend_handshake - * 27/09/2013 Massimiliano Pinto Changed in gw_read_backend_event the check for dcb_read(), - * now is if rc less than 0 - * 24/10/2014 Massimiliano Pinto Added Mysql user@host @db authentication support - * 10/11/2014 Massimiliano Pinto Client charset is passed to backend - * 19/06/2015 Martin Brampton Persistent connection handling - * 07/10/2015 Martin Brampton Remove calls to dcb_close - should be done by routers - * 27/10/2015 Martin Brampton Test for RCAP_TYPE_NO_RSESSION before calling clientReply - * 23/05/2016 Martin Brampton Provide for backend SSL - * */ static int gw_create_backend_connection(DCB *backend, SERVER *server, MXS_SESSION *in_session); @@ -68,7 +44,6 @@ extern char* create_auth_failed_msg(GWBUF* readbuf, char* hostaddr, uint8_t* sha static bool sescmd_response_complete(DCB* dcb); static void gw_reply_on_error(DCB *dcb, mxs_auth_state_t state); static int gw_read_and_write(DCB *dcb); -static int gw_decode_mysql_server_handshake(MySQLProtocol *conn, uint8_t *payload); static int gw_do_connect_to_backend(char *host, int port, int *fd); static void inline close_socket(int socket); static GWBUF *gw_create_change_user_packet(MYSQL_session* mses, @@ -1476,10 +1451,7 @@ static GWBUF* process_response_data(DCB* dcb, /** Get command which was stored in gw_MySQLWrite_backend */ p = DCB_PROTOCOL(dcb, MySQLProtocol); - if (!DCB_IS_CLONE(dcb)) - { - CHK_PROTOCOL(p); - } + CHK_PROTOCOL(p); /** All buffers processed here are sescmd responses */ gwbuf_set_type(*readbuf, GWBUF_TYPE_SESCMD_RESPONSE); @@ -1625,10 +1597,7 @@ static bool sescmd_response_complete(DCB* dcb) bool succp; p = DCB_PROTOCOL(dcb, MySQLProtocol); - if (!DCB_IS_CLONE(dcb)) - { - CHK_PROTOCOL(p); - } + CHK_PROTOCOL(p); protocol_get_response_status(p, &npackets_left, &nbytes_left); diff --git a/server/modules/protocol/MySQL/MySQLClient/mysql_client.c b/server/modules/protocol/MySQL/MySQLClient/mysql_client.c index 1ba2954ac..3421256d4 100644 --- a/server/modules/protocol/MySQL/MySQLClient/mysql_client.c +++ b/server/modules/protocol/MySQL/MySQLClient/mysql_client.c @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/MySQL/mysql_common.c b/server/modules/protocol/MySQL/mysql_common.c index d593ed6a5..d0b67c604 100644 --- a/server/modules/protocol/MySQL/mysql_common.c +++ b/server/modules/protocol/MySQL/mysql_common.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -13,33 +13,6 @@ /* * MySQL Protocol common routines for client to gateway and gateway to backend - * - * Revision History - * Date Who Description - * 17/06/2013 Massimiliano Pinto Common MySQL protocol routines - * 02/06/2013 Massimiliano Pinto MySQL connect asynchronous phases - * 04/09/2013 Massimiliano Pinto Added dcb NULL assert in mysql_send_custom_error - * 12/09/2013 Massimiliano Pinto Added checks in gw_decode_mysql_server_handshake and - * gw_read_backend_handshake - * 10/02/2014 Massimiliano Pinto Added MySQL Authentication with user@host - * 10/09/2014 Massimiliano Pinto Added MySQL Authentication option enabling localhost - * match with any host (wildcard %) - * Backend server configuration may differ so default is 0, - * don't match and an explicit - * localhost entry should be added for the selected user - * in the backends. - * Setting to 1 allow localhost (127.0.0.1 or socket) to - * match the any host grant via - * user@% - * 29/09/2014 Massimiliano Pinto Added Mysql user@host authentication with wildcard in IPv4 hosts: - * x.y.z.%, x.y.%.%, x.%.%.% - * 03/10/2014 Massimiliano Pinto Added netmask for wildcard in IPv4 hosts. - * 24/10/2014 Massimiliano Pinto Added Mysql user@host @db authentication support - * 10/11/2014 Massimiliano Pinto Charset at connect is passed to backend during authentication - * 07/07/2015 Martin Brampton Fix problem recognising null password - * 07/02/2016 Martin Brampton Remove authentication functions to mysql_auth.c - * 31/05/2016 Martin Brampton Add mysql_create_standard_error function - * */ #include @@ -1137,18 +1110,20 @@ int mxs_mysql_send_ok(DCB *dcb, int sequence, uint8_t affected_rows, const char* * Otherwise, the packet size is computed, based on the minimum size and * increased by the optional or variable elements. * - * @param conn The MySQLProtocol structure for the connection - * @param user Name of the user seeking to connect - * @param passwd Password for the user seeking to connect - * @param dbname Name of the database to be made default, if any + * @param with_ssl SSL is used + * @param ssl_established SSL is established + * @param user Name of the user seeking to connect + * @param passwd Password for the user seeking to connect + * @param dbname Name of the database to be made default, if any + * * @return The length of the response packet */ -static int -response_length(MySQLProtocol *conn, char *user, uint8_t *passwd, char *dbname, const char *auth_module) +static int response_length(bool with_ssl, bool ssl_established, char *user, uint8_t *passwd, + char *dbname, const char *auth_module) { long bytes; - if (conn->owner_dcb->server->server_ssl && conn->owner_dcb->ssl_state != SSL_ESTABLISHED) + if (with_ssl && !ssl_established) { return MYSQL_AUTH_PACKET_BASE_SIZE; } @@ -1243,14 +1218,14 @@ load_hashed_password(uint8_t *scramble, uint8_t *payload, uint8_t *passwd) * @note Capability bits are defined in maxscale/protocol/mysql.h */ static uint32_t -create_capabilities(MySQLProtocol *conn, bool db_specified, bool compress) +create_capabilities(MySQLProtocol *conn, bool with_ssl, bool db_specified, bool compress) { uint32_t final_capabilities; /** Copy client's flags to backend but with the known capabilities mask */ final_capabilities = (conn->client_capabilities & (uint32_t)GW_MYSQL_CAPABILITIES_CLIENT); - if (conn->owner_dcb->server->server_ssl) + if (with_ssl) { final_capabilities |= (uint32_t)GW_MYSQL_CAPABILITIES_SSL; /* Unclear whether we should include this */ @@ -1259,12 +1234,10 @@ create_capabilities(MySQLProtocol *conn, bool db_specified, bool compress) } /* Compression is not currently supported */ + ss_dassert(!compress); if (compress) { final_capabilities |= (uint32_t)GW_MYSQL_CAPABILITIES_COMPRESS; -#ifdef DEBUG_MYSQL_CONN - fprintf(stderr, ">>>> Backend Connection with compression\n"); -#endif } if (db_specified) @@ -1283,35 +1256,21 @@ create_capabilities(MySQLProtocol *conn, bool db_specified, bool compress) return final_capabilities; } -/** - * Write MySQL authentication packet to backend server - * - * @param dcb Backend DCB - * @return True on success, false on failure - */ -mxs_auth_state_t gw_send_backend_auth(DCB *dcb) +GWBUF* gw_generate_auth_response(MXS_SESSION* session, MySQLProtocol *conn, + bool with_ssl, bool ssl_established) { - MYSQL_session local_session; - gw_get_shared_session_auth_info(dcb, &local_session); + MYSQL_session client; + gw_get_shared_session_auth_info(session->client_dcb, &client); uint8_t client_capabilities[4] = {0, 0, 0, 0}; - uint8_t *curr_passwd = memcmp(local_session.client_sha1, null_client_sha1, MYSQL_SCRAMBLE_LEN) ? - local_session.client_sha1 : NULL; + uint8_t *curr_passwd = NULL; - /** - * If session is stopping or has failed return with error. - */ - if (dcb->session == NULL || - (dcb->session->state != SESSION_STATE_READY && - dcb->session->state != SESSION_STATE_ROUTER_READY) || - (dcb->server->server_ssl && - dcb->ssl_state == SSL_HANDSHAKE_FAILED)) + if (memcmp(client.client_sha1, null_client_sha1, MYSQL_SCRAMBLE_LEN) != 0) { - return MXS_AUTH_STATE_FAILED; + curr_passwd = client.client_sha1; } - MySQLProtocol *conn = (MySQLProtocol*)dcb->protocol; - uint32_t capabilities = create_capabilities(conn, (local_session.db && strlen(local_session.db)), false); + uint32_t capabilities = create_capabilities(conn, with_ssl, client.db[0], false); gw_mysql_set_byte4(client_capabilities, capabilities); /** @@ -1319,10 +1278,10 @@ mxs_auth_state_t gw_send_backend_auth(DCB *dcb) * different authentication mechanism, it will send an AuthSwitchRequest * packet. */ - const char* auth_plugin_name = DEFAULT_MYSQL_AUTH_PLUGIN; + const char* auth_plugin_name = DEFAULT_MYSQL_AUTH_PLUGIN; - long bytes = response_length(conn, local_session.user, curr_passwd, - local_session.db, auth_plugin_name); + long bytes = response_length(with_ssl, ssl_established, client.user, + curr_passwd, client.db, auth_plugin_name); // allocating the GWBUF GWBUF *buffer = gwbuf_alloc(bytes); @@ -1335,7 +1294,7 @@ mxs_auth_state_t gw_send_backend_auth(DCB *dcb) gw_mysql_set_byte3(payload, (bytes - 4)); // set packet # = 1 - payload[3] = (SSL_ESTABLISHED == dcb->ssl_state) ? '\x02' : '\x01'; + payload[3] = ssl_established ? '\x02' : '\x01'; payload += 4; // set client capabilities @@ -1358,53 +1317,79 @@ mxs_auth_state_t gw_send_backend_auth(DCB *dcb) memcpy(payload, &conn->extra_capabilities, sizeof(conn->extra_capabilities)); payload += 4; - if (dcb->server->server_ssl && dcb->ssl_state != SSL_ESTABLISHED) + if (!with_ssl || ssl_established) { - if (dcb_write(dcb, buffer) && dcb_connect_SSL(dcb) >= 0) + // 4 + 4 + 4 + 1 + 23 = 36, this includes the 4 bytes packet header + memcpy(payload, client.user, strlen(client.user)); + payload += strlen(client.user); + payload++; + + if (curr_passwd) { - return MXS_AUTH_STATE_CONNECTED; + payload = load_hashed_password(conn->scramble, payload, curr_passwd); + } + else + { + payload++; } - return MXS_AUTH_STATE_FAILED; + // if the db is not NULL append it + if (client.db[0]) + { + memcpy(payload, client.db, strlen(client.db)); + payload += strlen(client.db); + payload++; + } + + memcpy(payload, auth_plugin_name, strlen(auth_plugin_name)); + } - // 4 + 4 + 4 + 1 + 23 = 36, this includes the 4 bytes packet header - memcpy(payload, local_session.user, strlen(local_session.user)); - payload += strlen(local_session.user); - payload++; - - if (curr_passwd != NULL) - { - payload = load_hashed_password(conn->scramble, payload, curr_passwd); - } - else - { - payload++; - } - - // if the db is not NULL append it - if (local_session.db[0]) - { - memcpy(payload, local_session.db, strlen(local_session.db)); - payload += strlen(local_session.db); - payload++; - } - - memcpy(payload, auth_plugin_name, strlen(auth_plugin_name)); - - return dcb_write(dcb, buffer) ? MXS_AUTH_STATE_RESPONSE_SENT : MXS_AUTH_STATE_FAILED; + return buffer; } /** - * Decode mysql server handshake - * - * @param conn The MySQLProtocol structure - * @param payload The bytes just read from the net - * @return 0 on success, < 0 on failure + * Write MySQL authentication packet to backend server * + * @param dcb Backend DCB + * @return Authentication state after sending handshake response */ -static int -gw_decode_mysql_server_handshake(MySQLProtocol *conn, uint8_t *payload) +mxs_auth_state_t gw_send_backend_auth(DCB *dcb) +{ + mxs_auth_state_t rval = MXS_AUTH_STATE_FAILED; + + if (dcb->session == NULL || + (dcb->session->state != SESSION_STATE_READY && + dcb->session->state != SESSION_STATE_ROUTER_READY) || + (dcb->server->server_ssl && + dcb->ssl_state == SSL_HANDSHAKE_FAILED)) + { + return rval; + } + + bool with_ssl = dcb->server->server_ssl; + bool ssl_established = dcb->ssl_state == SSL_ESTABLISHED; + + GWBUF* buffer = gw_generate_auth_response(dcb->session, dcb->protocol, + with_ssl, ssl_established); + ss_dassert(buffer); + + if (with_ssl) + { + if (dcb_write(dcb, buffer) && dcb_connect_SSL(dcb) >= 0) + { + rval = MXS_AUTH_STATE_CONNECTED; + } + } + else if (dcb_write(dcb, buffer)) + { + rval = MXS_AUTH_STATE_RESPONSE_SENT; + } + + return rval; +} + +int gw_decode_mysql_server_handshake(MySQLProtocol *conn, uint8_t *payload) { uint8_t *server_version_end = NULL; uint16_t mysql_server_capabilities_one = 0; diff --git a/server/modules/protocol/examples/cdc.py b/server/modules/protocol/examples/cdc.py index df5853d9f..420f5546e 100755 --- a/server/modules/protocol/examples/cdc.py +++ b/server/modules/protocol/examples/cdc.py @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/examples/cdc_kafka_producer.py b/server/modules/protocol/examples/cdc_kafka_producer.py index 4dcc360eb..8e78a1c42 100755 --- a/server/modules/protocol/examples/cdc_kafka_producer.py +++ b/server/modules/protocol/examples/cdc_kafka_producer.py @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General @@ -16,7 +16,6 @@ # pip install kafka-python # -import json import sys import argparse from kafka import KafkaProducer @@ -30,8 +29,6 @@ parser.add_argument("-T", "--kafka-topic", dest="kafka_topic", default=None, required=True) opts = parser.parse_args(sys.argv[1:]) -decoder = json.JSONDecoder() -rbuf = bytes() producer = KafkaProducer(bootstrap_servers=[opts.kafka_broker]) while True: @@ -41,18 +38,9 @@ while True: if len(buf) == 0: break - rbuf += buf.encode() - - while True: - rbuf = rbuf.lstrip() - data = decoder.raw_decode(rbuf.decode('utf_8')) - rbuf = rbuf[data[1]:] - producer.send(topic=opts.kafka_topic, value=json.dumps(data[0]).encode()) - producer.flush() - - # JSONDecoder will return a ValueError if a partial JSON object is read - except ValueError as err: - pass + data = buf.encode().strip() + producer.send(topic=opts.kafka_topic, value=data) + producer.flush() # All other errors should interrupt the processing except Exception as ex: diff --git a/server/modules/protocol/examples/cdc_last_transaction.py b/server/modules/protocol/examples/cdc_last_transaction.py index 41f82304d..e3ea73c02 100755 --- a/server/modules/protocol/examples/cdc_last_transaction.py +++ b/server/modules/protocol/examples/cdc_last_transaction.py @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/examples/cdc_schema.go b/server/modules/protocol/examples/cdc_schema.go index bcd9c43e1..44910d500 100644 --- a/server/modules/protocol/examples/cdc_schema.go +++ b/server/modules/protocol/examples/cdc_schema.go @@ -3,7 +3,7 @@ // Use of this software is governed by the Business Source License included // in the LICENSE.TXT file and at www.mariadb.com/bsl11. // -// Change Date: 2019-07-01 +// Change Date: 2020-01-01 // // On the date above, in accordance with the Business Source License, use // of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/examples/cdc_schema.py b/server/modules/protocol/examples/cdc_schema.py index 1f4c1bd84..5701a2bcb 100755 --- a/server/modules/protocol/examples/cdc_schema.py +++ b/server/modules/protocol/examples/cdc_schema.py @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/examples/cdc_users.py b/server/modules/protocol/examples/cdc_users.py index 7462be552..fefc0d6e7 100755 --- a/server/modules/protocol/examples/cdc_users.py +++ b/server/modules/protocol/examples/cdc_users.py @@ -5,7 +5,7 @@ # Use of this software is governed by the Business Source License included # in the LICENSE.TXT file and at www.mariadb.com/bsl11. # -# Change Date: 2019-07-01 +# Change Date: 2020-01-01 # # On the date above, in accordance with the Business Source License, use # of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/maxscaled/maxscaled.c b/server/modules/protocol/maxscaled/maxscaled.c index 2b07d345b..bc6248f74 100644 --- a/server/modules/protocol/maxscaled/maxscaled.c +++ b/server/modules/protocol/maxscaled/maxscaled.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/maxscaled/maxscaled.h b/server/modules/protocol/maxscaled/maxscaled.h index 91f9985fe..cfe84e35f 100644 --- a/server/modules/protocol/maxscaled/maxscaled.h +++ b/server/modules/protocol/maxscaled/maxscaled.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/protocol/telnetd/telnetd.c b/server/modules/protocol/telnetd/telnetd.c index d989e9dc2..eebbb32cd 100644 --- a/server/modules/protocol/telnetd/telnetd.c +++ b/server/modules/protocol/telnetd/telnetd.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/avrorouter/avro.c b/server/modules/routing/avrorouter/avro.c index 233258adb..0812eb5fe 100644 --- a/server/modules/routing/avrorouter/avro.c +++ b/server/modules/routing/avrorouter/avro.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -148,7 +148,7 @@ MXS_MODULE* MXS_CREATE_MODULE() { MODULECMD_ARG_SERVICE | MODULECMD_ARG_NAME_MATCHES_DOMAIN, "The avrorouter service" }, { MODULECMD_ARG_STRING, "Action, whether to 'start' or 'stop' the conversion process" } }; - modulecmd_register_command(MXS_MODULE_NAME, "convert", avro_handle_convert, 2, args); + modulecmd_register_command(MXS_MODULE_NAME, "convert", MODULECMD_TYPE_ACTIVE, avro_handle_convert, 2, args); static MXS_ROUTER_OBJECT MyObject = { @@ -426,6 +426,12 @@ createInstance(SERVICE *service, char **options) inst->block_size = config_get_integer(params, "block_size"); MXS_CONFIG_PARAMETER *param = config_get_param(params, "source"); + inst->gtid.domain = 0; + inst->gtid.event_num = 0; + inst->gtid.seq = 0; + inst->gtid.server_id = 0; + inst->gtid.timestamp = 0; + memset(&inst->active_maps, 0, sizeof(inst->active_maps)); bool err = false; if (param) @@ -719,11 +725,9 @@ static void freeSession(MXS_ROUTER* router_instance, MXS_ROUTER_SESSION* router_ { AVRO_INSTANCE *router = (AVRO_INSTANCE *) router_instance; AVRO_CLIENT *client = (AVRO_CLIENT *) router_client_ses; - int prev_val; - prev_val = atomic_add(&router->stats.n_clients, -1); + ss_debug(int prev_val = )atomic_add(&router->stats.n_clients, -1); ss_dassert(prev_val > 0); - (void) prev_val; free(client->uuid); maxavro_file_close(client->file_handle); @@ -778,9 +782,6 @@ static void closeSession(MXS_ROUTER *instance, MXS_ROUTER_SESSION *router_sessio spinlock_release(&client->file_lock); spinlock_release(&client->catch_lock); - - /* decrease server registered slaves counter */ - atomic_add(&router->stats.n_clients, -1); } /** diff --git a/server/modules/routing/avrorouter/avro_client.c b/server/modules/routing/avrorouter/avro_client.c index 9f8fca5ac..bc454477d 100644 --- a/server/modules/routing/avrorouter/avro_client.c +++ b/server/modules/routing/avrorouter/avro_client.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/avrorouter/avro_file.c b/server/modules/routing/avrorouter/avro_file.c index da4014281..6550a5156 100644 --- a/server/modules/routing/avrorouter/avro_file.c +++ b/server/modules/routing/avrorouter/avro_file.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -114,6 +114,7 @@ AVRO_TABLE* avro_table_alloc(const char* filepath, const char* json_schema, cons &table->avro_schema)) { MXS_ERROR("Avro error: %s", avro_strerror()); + MXS_INFO("Avro schema: %s", json_schema); MXS_FREE(table); return NULL; } diff --git a/server/modules/routing/avrorouter/avro_index.c b/server/modules/routing/avrorouter/avro_index.c index 44e0e1b43..4de727e3d 100644 --- a/server/modules/routing/avrorouter/avro_index.c +++ b/server/modules/routing/avrorouter/avro_index.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -140,6 +140,7 @@ void avro_index_file(AVRO_INSTANCE *router, const char* filename) errmsg = NULL; prev_gtid = gtid; } + json_decref(row); } else { @@ -205,7 +206,7 @@ void avro_update_index(AVRO_INSTANCE* router) /** The SQL for the in-memory used_tables table */ static const char *insert_sql = "INSERT OR IGNORE INTO "MEMORY_TABLE_NAME "(domain, server_id, sequence, binlog_timestamp, table_name)" - " VALUES (%lu, %lu, %lu, %lu, \"%s\")"; + " VALUES (%lu, %lu, %lu, %u, \"%s\")"; /** * @brief Add a used table to the current transaction diff --git a/server/modules/routing/avrorouter/avro_rbr.c b/server/modules/routing/avrorouter/avro_rbr.c index f30cd2f12..b1d076a7e 100644 --- a/server/modules/routing/avrorouter/avro_rbr.c +++ b/server/modules/routing/avrorouter/avro_rbr.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/avrorouter/avro_schema.c b/server/modules/routing/avrorouter/avro_schema.c index dc81d06ca..a82c37c38 100644 --- a/server/modules/routing/avrorouter/avro_schema.c +++ b/server/modules/routing/avrorouter/avro_schema.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -102,16 +102,16 @@ char* json_new_schema_from_table(TABLE_MAP *map) json_object_set_new(schema, "name", json_string("ChangeRecord")); json_t *array = json_array(); - json_array_append(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", - avro_domain, "type", "int")); - json_array_append(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", - avro_server_id, "type", "int")); - json_array_append(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", - avro_sequence, "type", "int")); - json_array_append(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", - avro_event_number, "type", "int")); - json_array_append(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", - avro_timestamp, "type", "int")); + json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", + avro_domain, "type", "int")); + json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", + avro_server_id, "type", "int")); + json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", + avro_sequence, "type", "int")); + json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", + avro_event_number, "type", "int")); + json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name", + avro_timestamp, "type", "int")); /** Enums and other complex types are defined with complete JSON objects * instead of string values */ @@ -119,16 +119,19 @@ char* json_new_schema_from_table(TABLE_MAP *map) "name", "EVENT_TYPES", "symbols", "insert", "update_before", "update_after", "delete"); - json_array_append(array, json_pack_ex(&err, 0, "{s:s, s:o}", "name", avro_event_type, - "type", event_types)); + // Ownership of `event_types` is stolen when using the `o` format + json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:o}", "name", avro_event_type, + "type", event_types)); for (uint64_t i = 0; i < map->columns; i++) { - json_array_append(array, json_pack_ex(&err, 0, "{s:s, s:s, s:s, s:i}", - "name", create->column_names[i], - "type", column_type_to_avro_type(map->column_types[i]), - "real_type", create->column_types[i], - "length", create->column_lengths[i])); + ss_info_dassert(create->column_names[i] && *create->column_names[i], + "Column name should not be empty or NULL"); + json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s, s:s, s:i}", + "name", create->column_names[i], + "type", column_type_to_avro_type(map->column_types[i]), + "real_type", create->column_types[i], + "length", create->column_lengths[i])); } json_object_set_new(schema, "fields", array); char* rval = json_dumps(schema, JSON_PRESERVE_ORDER); @@ -543,6 +546,7 @@ static const char *extract_field_name(const char* ptr, char* dest, size_t size) dest[bytes] = '\0'; make_valid_avro_identifier(dest); + ss_dassert(strlen(dest) > 0); } else { @@ -555,7 +559,7 @@ static const char *extract_field_name(const char* ptr, char* dest, size_t size) int extract_type_length(const char* ptr, char *dest) { /** Skip any leading whitespace */ - while (isspace(*ptr) || *ptr == '`') + while (*ptr && (isspace(*ptr) || *ptr == '`')) { ptr++; } @@ -565,7 +569,7 @@ int extract_type_length(const char* ptr, char *dest) /** Skip characters until we either hit a whitespace character or the start * of the length definition. */ - while (!isspace(*ptr) && *ptr != '(') + while (*ptr && !isspace(*ptr) && *ptr != '(') { ptr++; } @@ -576,7 +580,7 @@ int extract_type_length(const char* ptr, char *dest) dest[typelen] = '\0'; /** Skip whitespace */ - while (isspace(*ptr)) + while (*ptr && isspace(*ptr)) { ptr++; } @@ -641,6 +645,7 @@ static int process_column_definition(const char *nameptr, char*** dest, char*** lengths[i] = len; types[i] = MXS_STRDUP_A(type); names[i] = MXS_STRDUP_A(colname); + ss_info_dassert(*names[i] && *types[i], "`name` and `type` must not be empty"); i++; } diff --git a/server/modules/routing/avrorouter/avrorouter.h b/server/modules/routing/avrorouter/avrorouter.h index 7c09bbfd3..2209d8bb6 100644 --- a/server/modules/routing/avrorouter/avrorouter.h +++ b/server/modules/routing/avrorouter/avrorouter.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/binlog_common.c b/server/modules/routing/binlogrouter/binlog_common.c index 31a1af443..af376e8a1 100644 --- a/server/modules/routing/binlogrouter/binlog_common.c +++ b/server/modules/routing/binlogrouter/binlog_common.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/blr.c b/server/modules/routing/binlogrouter/blr.c index b9b433bd2..368d7b35a 100644 --- a/server/modules/routing/binlogrouter/blr.c +++ b/server/modules/routing/binlogrouter/blr.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/blr.h b/server/modules/routing/binlogrouter/blr.h index 4b7ba2bca..b4f565115 100644 --- a/server/modules/routing/binlogrouter/blr.h +++ b/server/modules/routing/binlogrouter/blr.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/blr_cache.c b/server/modules/routing/binlogrouter/blr_cache.c index 7e1732156..20c03db13 100644 --- a/server/modules/routing/binlogrouter/blr_cache.c +++ b/server/modules/routing/binlogrouter/blr_cache.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/blr_file.c b/server/modules/routing/binlogrouter/blr_file.c index 5be5ccaec..c34fd23d5 100644 --- a/server/modules/routing/binlogrouter/blr_file.c +++ b/server/modules/routing/binlogrouter/blr_file.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/blr_master.c b/server/modules/routing/binlogrouter/blr_master.c index 6fd18c575..9a70254ac 100644 --- a/server/modules/routing/binlogrouter/blr_master.c +++ b/server/modules/routing/binlogrouter/blr_master.c @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/blr_slave.c b/server/modules/routing/binlogrouter/blr_slave.c index 78edea8f8..8afbd20b2 100644 --- a/server/modules/routing/binlogrouter/blr_slave.c +++ b/server/modules/routing/binlogrouter/blr_slave.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/maxbinlogcheck.c b/server/modules/routing/binlogrouter/maxbinlogcheck.c index c31afb2d6..e1ebe6ff8 100644 --- a/server/modules/routing/binlogrouter/maxbinlogcheck.c +++ b/server/modules/routing/binlogrouter/maxbinlogcheck.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/binlogrouter/test/testbinlog.c b/server/modules/routing/binlogrouter/test/testbinlog.c index 2125299b3..dca7492ed 100644 --- a/server/modules/routing/binlogrouter/test/testbinlog.c +++ b/server/modules/routing/binlogrouter/test/testbinlog.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/cli/cli.c b/server/modules/routing/cli/cli.c index dbd7e3919..2d9fdddb9 100644 --- a/server/modules/routing/cli/cli.c +++ b/server/modules/routing/cli/cli.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/debugcli/debugcli.c b/server/modules/routing/debugcli/debugcli.c index ef3610be5..c7f717b5b 100644 --- a/server/modules/routing/debugcli/debugcli.c +++ b/server/modules/routing/debugcli/debugcli.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/debugcli/debugcmd.c b/server/modules/routing/debugcli/debugcmd.c index 6c2a8aac3..7962e1cf7 100644 --- a/server/modules/routing/debugcli/debugcmd.c +++ b/server/modules/routing/debugcli/debugcmd.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -1575,8 +1575,8 @@ struct subcommand alteroptions[] = "\n" "address Server address\n" "port Server port\n" - "monuser Monitor user for this server\n" - "monpw Monitor password for this server\n" + "monitoruser Monitor user for this server\n" + "monitorpw Monitor password for this server\n" "ssl Enable SSL, value must be 'required'\n" "ssl_key Path to SSL private key\n" "ssl_cert Path to SSL certificate\n" diff --git a/server/modules/routing/hintrouter/dcb.cc b/server/modules/routing/hintrouter/dcb.cc index 76b0b22b2..baae227da 100644 --- a/server/modules/routing/hintrouter/dcb.cc +++ b/server/modules/routing/hintrouter/dcb.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/hintrouter/dcb.hh b/server/modules/routing/hintrouter/dcb.hh index fbb462449..21fb3cf57 100644 --- a/server/modules/routing/hintrouter/dcb.hh +++ b/server/modules/routing/hintrouter/dcb.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/hintrouter/hintrouter.cc b/server/modules/routing/hintrouter/hintrouter.cc index 79db39110..f15489960 100644 --- a/server/modules/routing/hintrouter/hintrouter.cc +++ b/server/modules/routing/hintrouter/hintrouter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/hintrouter/hintrouter.hh b/server/modules/routing/hintrouter/hintrouter.hh index e072ba837..61e6fa5a0 100644 --- a/server/modules/routing/hintrouter/hintrouter.hh +++ b/server/modules/routing/hintrouter/hintrouter.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/hintrouter/hintrouterdefs.hh b/server/modules/routing/hintrouter/hintrouterdefs.hh index ce8030a02..efce4b4ad 100644 --- a/server/modules/routing/hintrouter/hintrouterdefs.hh +++ b/server/modules/routing/hintrouter/hintrouterdefs.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/hintrouter/hintroutersession.cc b/server/modules/routing/hintrouter/hintroutersession.cc index 18ad85af1..27e0cd9ec 100644 --- a/server/modules/routing/hintrouter/hintroutersession.cc +++ b/server/modules/routing/hintrouter/hintroutersession.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/hintrouter/hintroutersession.hh b/server/modules/routing/hintrouter/hintroutersession.hh index 003890470..5e428d670 100644 --- a/server/modules/routing/hintrouter/hintroutersession.hh +++ b/server/modules/routing/hintrouter/hintroutersession.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/maxinfo/maxinfo.c b/server/modules/routing/maxinfo/maxinfo.c index c84093aac..8a26bf01d 100644 --- a/server/modules/routing/maxinfo/maxinfo.c +++ b/server/modules/routing/maxinfo/maxinfo.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/maxinfo/maxinfo.h b/server/modules/routing/maxinfo/maxinfo.h index 0d1070b51..22d68d400 100644 --- a/server/modules/routing/maxinfo/maxinfo.h +++ b/server/modules/routing/maxinfo/maxinfo.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/maxinfo/maxinfo_error.c b/server/modules/routing/maxinfo/maxinfo_error.c index 894181797..4ebfaa748 100644 --- a/server/modules/routing/maxinfo/maxinfo_error.c +++ b/server/modules/routing/maxinfo/maxinfo_error.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/maxinfo/maxinfo_exec.c b/server/modules/routing/maxinfo/maxinfo_exec.c index 7e465f899..c93fd8d25 100644 --- a/server/modules/routing/maxinfo/maxinfo_exec.c +++ b/server/modules/routing/maxinfo/maxinfo_exec.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/maxinfo/maxinfo_parse.c b/server/modules/routing/maxinfo/maxinfo_parse.c index 31cc125c2..1f00c6871 100644 --- a/server/modules/routing/maxinfo/maxinfo_parse.c +++ b/server/modules/routing/maxinfo/maxinfo_parse.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readconnroute/readconnection.h b/server/modules/routing/readconnroute/readconnection.h index 1c4ad3b94..74498ae2b 100644 --- a/server/modules/routing/readconnroute/readconnection.h +++ b/server/modules/routing/readconnroute/readconnection.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readconnroute/readconnroute.c b/server/modules/routing/readconnroute/readconnroute.c index 86edd3fa4..43d51cc98 100644 --- a/server/modules/routing/readconnroute/readconnroute.c +++ b/server/modules/routing/readconnroute/readconnroute.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index 5d9eca1d0..982b773d0 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readwritesplit/readwritesplit.h b/server/modules/routing/readwritesplit/readwritesplit.h index 5bfc3cc67..276366e50 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.h +++ b/server/modules/routing/readwritesplit/readwritesplit.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readwritesplit/rwsplit_internal.h b/server/modules/routing/readwritesplit/rwsplit_internal.h index d93472d26..a61dc59e5 100644 --- a/server/modules/routing/readwritesplit/rwsplit_internal.h +++ b/server/modules/routing/readwritesplit/rwsplit_internal.h @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readwritesplit/rwsplit_mysql.c b/server/modules/routing/readwritesplit/rwsplit_mysql.c index 14b1eed17..d772cac5a 100644 --- a/server/modules/routing/readwritesplit/rwsplit_mysql.c +++ b/server/modules/routing/readwritesplit/rwsplit_mysql.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readwritesplit/rwsplit_route_stmt.c b/server/modules/routing/readwritesplit/rwsplit_route_stmt.c index 730265fd7..0eec6d8f8 100644 --- a/server/modules/routing/readwritesplit/rwsplit_route_stmt.c +++ b/server/modules/routing/readwritesplit/rwsplit_route_stmt.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General @@ -728,13 +728,6 @@ route_target_t get_route_target(ROUTER_CLIENT_SES *rses, { target = TARGET_MASTER; } - /** - * A cloned session, route everything to the master - */ - else if (DCB_IS_CLONE(rses->client_dcb)) - { - target = TARGET_MASTER; - } /** * These queries are not affected by hints */ diff --git a/server/modules/routing/readwritesplit/rwsplit_select_backends.c b/server/modules/routing/readwritesplit/rwsplit_select_backends.c index 09fd36714..ca3dc6b04 100644 --- a/server/modules/routing/readwritesplit/rwsplit_select_backends.c +++ b/server/modules/routing/readwritesplit/rwsplit_select_backends.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readwritesplit/rwsplit_session_cmd.c b/server/modules/routing/readwritesplit/rwsplit_session_cmd.c index 6416212cc..82ccbddf1 100644 --- a/server/modules/routing/readwritesplit/rwsplit_session_cmd.c +++ b/server/modules/routing/readwritesplit/rwsplit_session_cmd.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/readwritesplit/rwsplit_tmp_table_multi.c b/server/modules/routing/readwritesplit/rwsplit_tmp_table_multi.c index ecdd0e2a4..c9b9f0a44 100644 --- a/server/modules/routing/readwritesplit/rwsplit_tmp_table_multi.c +++ b/server/modules/routing/readwritesplit/rwsplit_tmp_table_multi.c @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/schemarouter.cc b/server/modules/routing/schemarouter/schemarouter.cc index 21cca1614..dc61bdf68 100644 --- a/server/modules/routing/schemarouter/schemarouter.cc +++ b/server/modules/routing/schemarouter/schemarouter.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/schemarouter.hh b/server/modules/routing/schemarouter/schemarouter.hh index a3ed197f2..b9ce7b66c 100644 --- a/server/modules/routing/schemarouter/schemarouter.hh +++ b/server/modules/routing/schemarouter/schemarouter.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/schemarouterinstance.cc b/server/modules/routing/schemarouter/schemarouterinstance.cc index 2b71b16e8..a488dccae 100644 --- a/server/modules/routing/schemarouter/schemarouterinstance.cc +++ b/server/modules/routing/schemarouter/schemarouterinstance.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/schemarouterinstance.hh b/server/modules/routing/schemarouter/schemarouterinstance.hh index 3b9856119..b3cb3ca8b 100644 --- a/server/modules/routing/schemarouter/schemarouterinstance.hh +++ b/server/modules/routing/schemarouter/schemarouterinstance.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/schemaroutersession.cc b/server/modules/routing/schemarouter/schemaroutersession.cc index 0bad5ecde..e34415b6d 100644 --- a/server/modules/routing/schemarouter/schemaroutersession.cc +++ b/server/modules/routing/schemarouter/schemaroutersession.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/schemaroutersession.hh b/server/modules/routing/schemarouter/schemaroutersession.hh index 8bfb7803f..43908e6a7 100644 --- a/server/modules/routing/schemarouter/schemaroutersession.hh +++ b/server/modules/routing/schemarouter/schemaroutersession.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/session_command.cc b/server/modules/routing/schemarouter/session_command.cc index 07d3832fe..a5d7c520c 100644 --- a/server/modules/routing/schemarouter/session_command.cc +++ b/server/modules/routing/schemarouter/session_command.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/session_command.hh b/server/modules/routing/schemarouter/session_command.hh index 10e981ebf..7fa37e754 100644 --- a/server/modules/routing/schemarouter/session_command.hh +++ b/server/modules/routing/schemarouter/session_command.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/shard_map.cc b/server/modules/routing/schemarouter/shard_map.cc index f6025997d..56bc1977b 100644 --- a/server/modules/routing/schemarouter/shard_map.cc +++ b/server/modules/routing/schemarouter/shard_map.cc @@ -4,7 +4,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/modules/routing/schemarouter/shard_map.hh b/server/modules/routing/schemarouter/shard_map.hh index e45206ea0..e7ae5cc2a 100644 --- a/server/modules/routing/schemarouter/shard_map.hh +++ b/server/modules/routing/schemarouter/shard_map.hh @@ -5,7 +5,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General diff --git a/server/test/maxscale_test.h.in b/server/test/maxscale_test.h.in index 2a21d4752..d17b73a77 100644 --- a/server/test/maxscale_test.h.in +++ b/server/test/maxscale_test.h.in @@ -7,7 +7,7 @@ * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * - * Change Date: 2019-07-01 + * Change Date: 2020-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General