mirror of
https://git.postgresql.org/git/postgresql.git
synced 2026-02-26 00:07:01 +08:00
pg_regprefix was doing nothing with lookahead constraints, which would be fine if it were the right kind of nothing, but it isn't: we have to terminate our search for a fixed prefix, not just pretend the LACON arc isn't there. Otherwise, if the current state has both a LACON outarc and a single plain-color outarc, we'd falsely conclude that the color represents an addition to the fixed prefix, and generate an extracted index condition that restricts the indexscan too much. (See added regression test case.) Terminating the search is conservative: we could traverse the LACON arc (thus assuming that the constraint can be satisfied at runtime) and then examine the outarcs of the linked-to state. But that would be a lot more work than it seems worth, because writing a LACON followed by a single plain character is a pretty silly thing to do. This makes a difference only in rather contrived cases, but it's a bug, so back-patch to all supported branches.
85 lines
3.2 KiB
SQL
85 lines
3.2 KiB
SQL
--
|
|
-- Regular expression tests
|
|
--
|
|
|
|
-- Don't want to have to double backslashes in regexes
|
|
set standard_conforming_strings = on;
|
|
|
|
-- Test simple quantified backrefs
|
|
select 'bbbbb' ~ '^([bc])\1*$' as t;
|
|
select 'ccc' ~ '^([bc])\1*$' as t;
|
|
select 'xxx' ~ '^([bc])\1*$' as f;
|
|
select 'bbc' ~ '^([bc])\1*$' as f;
|
|
select 'b' ~ '^([bc])\1*$' as t;
|
|
|
|
-- Test quantified backref within a larger expression
|
|
select 'abc abc abc' ~ '^(\w+)( \1)+$' as t;
|
|
select 'abc abd abc' ~ '^(\w+)( \1)+$' as f;
|
|
select 'abc abc abd' ~ '^(\w+)( \1)+$' as f;
|
|
select 'abc abc abc' ~ '^(.+)( \1)+$' as t;
|
|
select 'abc abd abc' ~ '^(.+)( \1)+$' as f;
|
|
select 'abc abc abd' ~ '^(.+)( \1)+$' as f;
|
|
|
|
-- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun
|
|
select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
|
|
select substring('a' from '((a))+');
|
|
select substring('a' from '((a)+)');
|
|
|
|
-- Test conversion of regex patterns to indexable conditions
|
|
explain (costs off) select * from pg_proc where proname ~ 'abc';
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc';
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc$';
|
|
explain (costs off) select * from pg_proc where proname ~ '^abcd*e';
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc+d';
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)';
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)$';
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
|
|
explain (costs off) select * from pg_proc where proname ~ '^abcd(x|(?=\w\w)q)';
|
|
|
|
-- Test for infinite loop in pullback() (CVE-2007-4772)
|
|
select 'a' ~ '($|^)*';
|
|
|
|
-- These cases expose a bug in the original fix for CVE-2007-4772
|
|
select 'a' ~ '(^)+^';
|
|
select 'a' ~ '$($$)+';
|
|
|
|
-- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix
|
|
select 'a' ~ '($^)+';
|
|
select 'a' ~ '(^$)*';
|
|
select 'aa bb cc' ~ '(^(?!aa))+';
|
|
select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
|
|
-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
|
|
select 'a' ~ '((((((a)*)*)*)*)*)*';
|
|
select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
|
|
|
|
-- These cases used to give too-many-states failures
|
|
select 'x' ~ 'abcd(\m)+xyz';
|
|
select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)';
|
|
select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$';
|
|
select 'x' ~ 'xyz(\Y\Y)+';
|
|
select 'x' ~ 'x|(?:\M)+';
|
|
|
|
-- This generates O(N) states but O(N^2) arcs, so it causes problems
|
|
-- if arc count is not constrained
|
|
select 'x' ~ repeat('x*y*z*', 1000);
|
|
|
|
-- Test backref in combination with non-greedy quantifier
|
|
-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0
|
|
select 'Programmer' ~ '(\w).*?\1' as t;
|
|
select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
|
|
|
|
-- Test for proper matching of non-greedy iteration (bug #11478)
|
|
select regexp_matches('foo/bar/baz',
|
|
'^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', '');
|
|
|
|
-- Test for infinite loop in cfindloop with zero-length possible match
|
|
-- but no actual match (can only happen in the presence of backrefs)
|
|
select 'a' ~ '$()|^\1';
|
|
select 'a' ~ '.. ()|\1';
|
|
select 'a' ~ '()*\1';
|
|
select 'a' ~ '()+\1';
|