fix regex error

This commit is contained in:
ganyang
2022-06-15 10:55:50 +08:00
parent 57bc5c4a37
commit bddf4cedd0
5 changed files with 142 additions and 63 deletions

View File

@ -705,7 +705,7 @@ static void moveins(struct nfa* nfa, struct state* oldState, struct state* newSt
*
* Either all arcs, or only non-empty ones as determined by all value.
*/
static void copyins(struct nfa* nfa, struct state* oldState, struct state* newState, int all)
static void copyins(struct nfa* nfa, struct state* oldState, struct state* newState)
{
Assert(oldState != newState);
@ -714,9 +714,9 @@ static void copyins(struct nfa* nfa, struct state* oldState, struct state* newSt
/* With not too many arcs, just do them one at a time */
struct arc* a = NULL;
for (a = oldState->ins; a != NULL; a = a->inchain)
if (all || a->type != EMPTY)
cparc(nfa, a, a->from, newState);
for (a = oldState->ins; a != NULL; a = a->inchain) {
cparc(nfa, a, a->from, newState);
}
} else {
/*
* With many arcs, use a sort-merge approach. Note that createarc()
@ -735,10 +735,6 @@ static void copyins(struct nfa* nfa, struct state* oldState, struct state* newSt
while (oa != NULL && na != NULL) {
struct arc* a = oa;
if (!all && a->type == EMPTY) {
oa = oa->inchain;
continue;
}
switch (sortins_cmp(&oa, &na)) {
case -1:
@ -763,11 +759,6 @@ static void copyins(struct nfa* nfa, struct state* oldState, struct state* newSt
/* newState does not have anything matching oa */
struct arc* a = oa;
if (!all && a->type == EMPTY) {
oa = oa->inchain;
continue;
}
oa = oa->inchain;
createarc(nfa, a->type, a->co, a->from, newState);
}
@ -931,7 +922,7 @@ static void moveouts(struct nfa* nfa, struct state* oldState, struct state* newS
*
* Either all arcs, or only non-empty ones as determined by all value.
*/
static void copyouts(struct nfa* nfa, struct state* oldState, struct state* newState, int all)
static void copyouts(struct nfa* nfa, struct state* oldState, struct state* newState)
{
Assert(oldState != newState);
@ -939,9 +930,9 @@ static void copyouts(struct nfa* nfa, struct state* oldState, struct state* newS
/* With not too many arcs, just do them one at a time */
struct arc* a = NULL;
for (a = oldState->outs; a != NULL; a = a->outchain)
if (all || a->type != EMPTY)
cparc(nfa, a, newState, a->to);
for (a = oldState->outs; a != NULL; a = a->outchain) {
cparc(nfa, a, newState, a->to);
}
} else {
/*
* With many arcs, use a sort-merge approach. Note that createarc()
@ -960,11 +951,6 @@ static void copyouts(struct nfa* nfa, struct state* oldState, struct state* newS
while (oa != NULL && na != NULL) {
struct arc* a = oa;
if (!all && a->type == EMPTY) {
oa = oa->outchain;
continue;
}
switch (sortouts_cmp(&oa, &na)) {
case -1:
/* newState does not have anything matching oa */
@ -988,11 +974,6 @@ static void copyouts(struct nfa* nfa, struct state* oldState, struct state* newS
/* newState does not have anything matching oa */
struct arc* a = oa;
if (!all && a->type == EMPTY) {
oa = oa->outchain;
continue;
}
oa = oa->outchain;
createarc(nfa, a->type, a->co, newState, a->to);
}
@ -1262,6 +1243,10 @@ static long /* re_info bits */
fprintf(f, "\nfinal cleanup:\n");
#endif
cleanup(nfa); /* final tidying */
#ifdef REG_DEBUG
if (verbose)
dumpnfa(nfa, f);
#endif
return analyze(nfa); /* and analysis */
}
@ -1274,6 +1259,7 @@ static void pullback(struct nfa* nfa, FILE* f) /* for debug output; NULL none */
struct state* nexts = NULL;
struct arc* a = NULL;
struct arc* nexta = NULL;
struct state* intermediates;
int progress;
/* find and pull until there are no more */
@ -1281,13 +1267,23 @@ static void pullback(struct nfa* nfa, FILE* f) /* for debug output; NULL none */
progress = 0;
for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
nexts = s->next;
intermediates = NULL;
for (a = s->outs; a != NULL && !NISERR(); a = nexta) {
nexta = a->outchain;
if (a->type == '^' || a->type == BEHIND)
if (pull(nfa, a))
if (pull(nfa, a, &intermediates))
progress = 1;
Assert(nexta == NULL || s->no != FREESTATE);
}
/* clear tmp fields of intermediate states created here */
while (intermediates != NULL) {
struct state* ns = intermediates->tmp;
intermediates->tmp = NULL;
intermediates = ns;
}
/* if s is now useless, get rid of it */
if ((s->nins == 0 || s->nouts == 0) && !s->flag)
dropstate(nfa, s);
}
if (progress && f != NULL)
dumpnfa(nfa, f);
@ -1318,7 +1314,7 @@ static void pullback(struct nfa* nfa, FILE* f) /* for debug output; NULL none */
* was that state's last outarc.
*/
static int /* 0 couldn't, 1 could */
pull(struct nfa* nfa, struct arc* con)
pull(struct nfa* nfa, struct arc* con, struct state** intermediates)
{
struct state* from = con->from;
struct state* to = con->to;
@ -1339,16 +1335,18 @@ static int /* 0 couldn't, 1 could */
s = newstate(nfa);
if (NISERR())
return 0;
copyins(nfa, from, s, 1); /* duplicate inarcs */
cparc(nfa, con, s, to); /* move constraint arc */
copyins(nfa, from, s); /* duplicate inarcs */
cparc(nfa, con, s, to); /* move constraint arc */
freearc(nfa, con);
if (NISERR())
return 0;
from = s;
con = from->outs;
}
Assert(from->nouts == 1);
/* propagate the constraint into the from state's inarcs */
for (a = from->ins; a != NULL; a = nexta) {
for (a = from->ins; a != NULL && !NISERR(); a = nexta) {
nexta = a->inchain;
switch (combine(con, a)) {
case INCOMPATIBLE: /* destroy the arc */
@ -1357,13 +1355,21 @@ static int /* 0 couldn't, 1 could */
case SATISFIED: /* no action needed */
break;
case COMPATIBLE: /* swap the two arcs, more or less */
s = newstate(nfa);
if (NISERR())
return 0;
cparc(nfa, a, s, to); /* anticipate move */
/* need an intermediate state, but might have one already */
for (s = *intermediates; s != NULL; s = s->tmp) {
assert(s->nins > 0 && s->nouts > 0);
if (s->ins->from == a->from && s->outs->to == to)
break;
}
if (s == NULL) {
s = newstate(nfa);
if (NISERR())
return 0;
s->tmp = *intermediates;
*intermediates = s;
}
cparc(nfa, con, a->from, s);
if (NISERR())
return 0;
cparc(nfa, a, s, to);
freearc(nfa, a);
break;
default:
@ -1374,7 +1380,7 @@ static int /* 0 couldn't, 1 could */
/* remaining inarcs, if any, incorporate the constraint */
moveins(nfa, from, to);
dropstate(nfa, from); /* will free the constraint */
freearc(nfa, con);
return 1;
}
@ -1387,6 +1393,7 @@ static void pushfwd(struct nfa* nfa, FILE* f) /* for debug output; NULL none */
struct state* nexts = NULL;
struct arc* a = NULL;
struct arc* nexta = NULL;
struct state* intermediates;
int progress;
/* find and push until there are no more */
@ -1394,13 +1401,23 @@ static void pushfwd(struct nfa* nfa, FILE* f) /* for debug output; NULL none */
progress = 0;
for (s = nfa->states; s != NULL && !NISERR(); s = nexts) {
nexts = s->next;
intermediates = NULL;
for (a = s->ins; a != NULL && !NISERR(); a = nexta) {
nexta = a->inchain;
if (a->type == '$' || a->type == AHEAD)
if (push(nfa, a))
if (push(nfa, a, &intermediates))
progress = 1;
Assert(nexta == NULL || s->no != FREESTATE);
}
/* clear tmp fields of intermediate states created here */
while (intermediates != NULL) {
struct state* ns = intermediates->tmp;
intermediates->tmp = NULL;
intermediates = ns;
}
/* if s is now useless, get rid of it */
if ((s->nins == 0 || s->nouts == 0) && !s->flag)
dropstate(nfa, s);
}
if (progress && f != NULL)
dumpnfa(nfa, f);
@ -1431,7 +1448,7 @@ static void pushfwd(struct nfa* nfa, FILE* f) /* for debug output; NULL none */
* was that state's last inarc.
*/
static int /* 0 couldn't, 1 could */
push(struct nfa* nfa, struct arc* con)
push(struct nfa* nfa, struct arc* con, struct state** intermediates)
{
struct state* from = con->from;
struct state* to = con->to;
@ -1452,16 +1469,18 @@ static int /* 0 couldn't, 1 could */
s = newstate(nfa);
if (NISERR())
return 0;
copyouts(nfa, to, s, 1); /* duplicate outarcs */
copyouts(nfa, to, s); /* duplicate outarcs */
cparc(nfa, con, from, s); /* move constraint */
freearc(nfa, con);
if (NISERR())
return 0;
to = s;
con = to->ins;
}
Assert(to->nins == 1);
/* propagate the constraint into the to state's outarcs */
for (a = to->outs; a != NULL; a = nexta) {
for (a = to->outs; a != NULL && !NISERR(); a = nexta) {
nexta = a->outchain;
switch (combine(con, a)) {
case INCOMPATIBLE: /* destroy the arc */
@ -1470,13 +1489,21 @@ static int /* 0 couldn't, 1 could */
case SATISFIED: /* no action needed */
break;
case COMPATIBLE: /* swap the two arcs, more or less */
s = newstate(nfa);
if (NISERR())
return 0;
cparc(nfa, con, s, a->to); /* anticipate move */
/* need an intermediate state, but might have one already */
for (s = *intermediates; s != NULL; s = s->tmp) {
assert(s->nins > 0 && s->nouts > 0);
if (s->ins->from == from && s->outs->to == a->to)
break;
}
if (s == NULL) {
s = newstate(nfa);
if (NISERR())
return 0;
s->tmp = *intermediates;
*intermediates = s;
}
cparc(nfa, con, s, a->to);
cparc(nfa, a, from, s);
if (NISERR())
return 0;
freearc(nfa, a);
break;
default:
@ -1487,7 +1514,7 @@ static int /* 0 couldn't, 1 could */
/* remaining outarcs, if any, incorporate the constraint */
moveouts(nfa, to, from);
dropstate(nfa, to); /* will free the constraint */
freearc(nfa, con);
return 1;
}
@ -2543,6 +2570,8 @@ static void dumpnfa(struct nfa* nfa, FILE* f)
{
#ifdef REG_DEBUG
struct state* s = NULL;
int nstates = 0;
int narcs = 0;
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
if (nfa->bos[0] != COLORLESS)
@ -2554,8 +2583,12 @@ static void dumpnfa(struct nfa* nfa, FILE* f)
if (nfa->eos[1] != COLORLESS)
fprintf(f, ", eol [%ld]", (long)nfa->eos[1]);
fprintf(f, "\n");
for (s = nfa->states; s != NULL; s = s->next)
for (s = nfa->states; s != NULL; s = s->next) {
dumpstate(s, f);
nstates++;
narcs += s->nouts;
}
fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
if (nfa->parent == NULL)
dumpcolors(nfa->cm, f);
fflush(f);

View File

@ -135,10 +135,10 @@ static int sortins_cmp(const void*, const void*);
static void sortouts(struct nfa*, struct state*);
static int sortouts_cmp(const void*, const void*);
static void moveins(struct nfa*, struct state*, struct state*);
static void copyins(struct nfa*, struct state*, struct state*, int);
static void copyins(struct nfa*, struct state*, struct state*);
static void mergeins(struct nfa*, struct state*, struct arc**, int);
static void moveouts(struct nfa*, struct state*, struct state*);
static void copyouts(struct nfa*, struct state*, struct state*, int);
static void copyouts(struct nfa*, struct state*, struct state*);
static void cloneouts(struct nfa*, struct state*, struct state*, struct state*, int);
static void delsub(struct nfa*, struct state*, struct state*);
static void deltraverse(struct nfa*, struct state*, struct state*);
@ -149,9 +149,9 @@ static struct state* single_color_transition(struct state*, struct state*);
static void specialcolors(struct nfa*);
static long optimize(struct nfa*, FILE*);
static void pullback(struct nfa*, FILE*);
static int pull(struct nfa*, struct arc*);
static int pull(struct nfa *, struct arc *, struct state **);
static void pushfwd(struct nfa*, FILE*);
static int push(struct nfa*, struct arc*);
static int push(struct nfa *, struct arc *, struct state **);
#define INCOMPATIBLE 1 /* destroys arc */
#define SATISFIED 2 /* constraint satisfied */
@ -179,7 +179,6 @@ static void dumpnfa(struct nfa*, FILE*);
#ifdef REG_DEBUG
static void dumpstate(struct state*, FILE*);
static void dumparcs(struct state*, FILE*);
static int dumprarcs(struct arc*, struct state*, FILE*, int);
static void dumparc(struct arc*, struct state*, FILE*);
static void dumpcnfa(struct cnfa*, FILE*);
static void dumpcstate(int, struct cnfa*, FILE*);
@ -597,7 +596,9 @@ static void makesearch(struct vars* v, struct nfa* nfa)
/* do the splits */
for (s = slist; s != NULL; s = s2) {
s2 = newstate(nfa);
copyouts(nfa, s, s2, 1);
NOERR();
copyouts(nfa, s, s2);
NOERR();
for (a = s->ins; a != NULL; a = b) {
b = a->inchain;
if (a->from != pre) {
@ -1730,7 +1731,7 @@ static void cleanst(struct vars* v)
/*
* nfatree - turn a subRE subtree into a tree of compacted NFAs
* f��for debug output
* for debug output
* return optimize results from top node
*/
static long nfatree(struct vars* v, struct subre* t, FILE* f)
@ -1895,7 +1896,7 @@ static void dump(regex_t* re, FILE* f)
dumpcolors(&g->cmap, f);
if (!NULLCNFA(g->search)) {
printf("\nsearch:\n");
fprintf(f, "\nsearch:\n");
dumpcnfa(&g->search, f);
}
for (i = 1; i < g->nlacons; i++) {

View File

@ -0,0 +1,35 @@
-- These cases used to give too-many-states failures
select 'x' ~ 'abcd(\m)+xyz';
?column?
----------
f
(1 row)
select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)';
?column?
----------
f
(1 row)
select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$';
?column?
----------
f
(1 row)
select 'x' ~ 'xyz(\Y\Y)+';
?column?
----------
f
(1 row)
select 'x' ~ 'x|(?:\M)+';
?column?
----------
t
(1 row)
-- This generates O(N) states but O(N^2) arcs, so it causes problems
-- if arc count is not constrained
select 'x' ~ repeat('x*y*z*', 1000);
ERROR: invalid regular expression: regular expression is too complex

View File

@ -687,7 +687,7 @@ test: hw_to_timestamp hw_view_privilege
test: hw_identifier
#test: hw_hashint1 hw_smalldatetime_hash hw_rawtype_hash
#test: hw_nvarchar2_hash cmpr_smallint cmpr_prefix_150left cmpr_uint32_oid
test: oidjoins opr_sanity_2 regex
test: oidjoins opr_sanity_2 regex regex2
#test: opr_sanity_1
test: pmk
@ -915,4 +915,4 @@ test: subscription
test: fdw_audit
test: gs_global_config_audit
test: detail
test: gs_dump_encrypt
test: gs_dump_encrypt

View File

@ -0,0 +1,10 @@
-- These cases used to give too-many-states failures
select 'x' ~ 'abcd(\m)+xyz';
select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)';
select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$';
select 'x' ~ 'xyz(\Y\Y)+';
select 'x' ~ 'x|(?:\M)+';
-- This generates O(N) states but O(N^2) arcs, so it causes problems
-- if arc count is not constrained
select 'x' ~ repeat('x*y*z*', 1000);