Files
oceanbase/deps/oblib/unittest/lib/regex/test_regex.cpp
gm 4a92b6d7df reformat source code
according to code styles, 'AccessModifierOffset' should be -2.
2021-06-17 10:40:36 +08:00

584 lines
14 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include <gtest/gtest.h>
#include "lib/ob_define.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include "lib/regex/regex/ob_regex.h"
#include <assert.h>
#include "test_regex.ih"
#include "lib/utility/ob_macro_utils.h"
using namespace oceanbase::common;
int debug = 0;
int line = 0;
int status = 0;
int copts = OB_REG_EXTENDED;
int eopts = 0;
ob_regoff_t startoff = 0;
ob_regoff_t endoff = 0;
// extern void regprint();
/*
- split - divide a string into fields, like awk split()
= int split(char *string, char *fields[], int nfields, char *sep);
*/
int /* number of fields, including overflow */
split(char* string, char* fields[], /* list is not NULL-terminated */
int nfields, /* number of entries available in fields[] */
char* sep /* "" white, "c" single char, "ab" [ab]+ */
)
{
register char* p = string;
register char c; /* latest character */
register char sepc = sep[0];
register char sepc2;
register int fn;
register char** fp = fields;
register char* sepp;
register int trimtrail;
/* white space */
if (sepc == '\0') {
while ((c = *p++) == ' ' || c == '\t')
continue;
p--;
trimtrail = 1;
static char static_sep[5] = " \t";
sep = static_sep; /* note, code below knows this is 2 long */
sepc = ' ';
} else
trimtrail = 0;
sepc2 = sep[1]; /* now we can safely pick this up */
/* catch empties */
if (*p == '\0')
return (0);
/* single separator */
if (sepc2 == '\0') {
fn = nfields;
for (;;) {
*fp++ = p;
fn--;
if (fn == 0)
break;
while ((c = *p++) != sepc)
if (c == '\0')
return (nfields - fn);
*(p - 1) = '\0';
}
/* we have overflowed the fields vector -- just count them */
fn = nfields;
for (;;) {
while ((c = *p++) != sepc)
if (c == '\0')
return (fn);
fn++;
}
/* not reached */
}
/* two separators */
if (sep[2] == '\0') {
fn = nfields;
for (;;) {
*fp++ = p;
fn--;
while ((c = *p++) != sepc && c != sepc2)
if (c == '\0') {
if (trimtrail && **(fp - 1) == '\0')
fn++;
return (nfields - fn);
}
if (fn == 0)
break;
*(p - 1) = '\0';
while ((c = *p++) == sepc || c == sepc2)
continue;
p--;
}
/* we have overflowed the fields vector -- just count them */
fn = nfields;
while (c != '\0') {
while ((c = *p++) == sepc || c == sepc2)
continue;
p--;
fn++;
while ((c = *p++) != '\0' && c != sepc && c != sepc2)
continue;
}
/* might have to trim trailing white space */
if (trimtrail) {
p--;
while ((c = *--p) == sepc || c == sepc2)
continue;
p++;
if (*p != '\0') {
if (fn == nfields + 1)
*p = '\0';
fn--;
}
}
return (fn);
}
/* n separators */
fn = 0;
for (;;) {
if (fn < nfields)
*fp++ = p;
fn++;
for (;;) {
c = *p++;
if (c == '\0')
return (fn);
sepp = sep;
while ((sepc = *sepp++) != '\0' && sepc != c)
continue;
if (sepc != '\0') /* it was a separator */
break;
}
if (fn < nfields)
*(p - 1) = '\0';
for (;;) {
c = *p++;
sepp = sep;
while ((sepc = *sepp++) != '\0' && sepc != c)
continue;
if (sepc == '\0') /* it wasn't a separator */
break;
}
p--;
}
/* not reached */
}
/*
- regress - main loop of regression test
== bool regress(FILE *in);
*/
bool regress(FILE* in)
{
char inbuf[1000];
#define MAXF 10
char* f[MAXF];
int nf;
int i;
char erbuf[100];
size_t ne;
const char* badpat = "invalid regular expression";
#define SHORT 10
const char* bpname = "OB_REG_BADPAT";
ob_regex_t re;
char sep[5] = "\t\t";
while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
line++;
if (inbuf[0] == '#' || inbuf[0] == '\n')
continue; /* NOTE CONTINUE */
inbuf[strlen(inbuf) - 1] = '\0'; /* get rid of stupid \n */
if (debug)
fprintf(stdout, "%d:\n", line);
nf = split(inbuf, f, MAXF, sep);
if (nf < 3) {
fprintf(stderr, "bad input, line %d\n", line);
return false;
}
for (i = 0; i < nf; i++)
if (strcmp(f[i], "\"\"") == 0)
f[i][0] = '\0';
// f[i] = "";
if (nf <= 3)
f[3] = NULL;
if (nf <= 4)
f[4] = NULL;
try_case(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
if (opt('&', f[1])) /* try with either type of RE */
try_case(f[0], f[1], f[2], f[3], f[4], options('c', f[1]) & ~OB_REG_EXTENDED);
}
ne = ob_regerror(OB_REG_BADPAT, (ob_regex_t*)NULL, erbuf, sizeof(erbuf));
if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat) + 1) {
fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", erbuf, badpat);
status = 1;
}
ne = ob_regerror(OB_REG_BADPAT, (ob_regex_t*)NULL, erbuf, (size_t)SHORT);
if (strncmp(erbuf, badpat, SHORT - 1) != 0 || erbuf[SHORT - 1] != '\0' || ne != strlen(badpat) + 1) {
fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", erbuf, SHORT - 1, badpat);
status = 1;
}
ne = ob_regerror(OB_REG_ITOA | OB_REG_BADPAT, (ob_regex_t*)NULL, erbuf, sizeof(erbuf));
if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", erbuf, bpname);
status = 1;
}
re.re_endp = bpname;
ne = ob_regerror(OB_REG_ATOI, &re, erbuf, sizeof(erbuf));
if (atoi(erbuf) != (int)OB_REG_BADPAT) {
fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", erbuf, (long)OB_REG_BADPAT);
status = 1;
} else if (ne != strlen(erbuf) + 1) {
fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", erbuf, (long)OB_REG_BADPAT);
status = 1;
}
return 0 == status;
}
/*
- try_case - try it, and report on problems
== void try_case(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
*/
void try_case(char* f0, char* f1, char* f2, char* f3, char* f4, int opts /* may not match f1 */
)
{
ob_regex_t re;
#define NSUBS 10
ob_regmatch_t subs[NSUBS];
#define NSHOULD 15
char* should[NSHOULD];
int nshould;
char erbuf[100];
int err;
int len;
const char* type = (opts & OB_REG_EXTENDED) ? "ERE" : "BRE";
register int i;
char* grump;
char f0copy[1000];
char f2copy[1000];
char sep[5] = ",";
strcpy(f0copy, f0);
re.re_endp = (opts & OB_REG_PEND) ? f0copy + strlen(f0copy) : NULL;
fixstr(f0copy);
err = ob_regcomp(&re, f0copy, opts, &ob_charset_utf8mb4_general_ci);
if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
/* unexpected error or wrong error */
len = (int)ob_regerror(err, &re, erbuf, sizeof(erbuf));
fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", line, type, eprint(err), len, (int)sizeof(erbuf), erbuf);
status = 1;
} else if (err == 0 && opt('C', f1)) {
/* unexpected success */
fprintf(stderr, "%d: %s should have given OB_REG_%s\n", line, type, f2);
status = 1;
err = 1; /* so we won't try regexec */
}
if (err != 0) {
ob_regfree(&re);
return;
}
strcpy(f2copy, f2);
fixstr(f2copy);
if (options('e', f1) & OB_REG_STARTEND) {
if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
fprintf(stderr, "%d: bad STARTEND syntax\n", line);
subs[0].rm_so = strchr(f2, '(') - f2 + 1;
subs[0].rm_eo = strchr(f2, ')') - f2;
}
err = ob_regexec(&re, f2copy, NSUBS, subs, options('e', f1));
if (err != 0 && (f3 != NULL || err != OB_REG_NOMATCH)) {
/* unexpected error or wrong error */
len = (int)ob_regerror(err, &re, erbuf, sizeof(erbuf));
fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", line, type, eprint(err), len, (int)sizeof(erbuf), erbuf);
status = 1;
} else if (err != 0) {
/* nothing more to check */
} else if (f3 == NULL) {
/* unexpected success */
fprintf(stderr, "%d: %s exec should have failed\n", line, type);
status = 1;
err = 1; /* just on principle */
} else if (opts & OB_REG_NOSUB) {
/* nothing more to check */
} else if ((grump = check(f2, subs[0], f3)) != NULL) {
fprintf(stderr, "%d: %s %s\n", line, type, grump);
status = 1;
err = 1;
}
if (err != 0 || f4 == NULL) {
ob_regfree(&re);
return;
}
for (i = 1; i < NSHOULD; i++)
should[i] = NULL;
nshould = split(f4, should + 1, NSHOULD - 1, sep);
if (nshould == 0) {
nshould = 1;
should[1][0] = '\0';
// should[1] = "";
}
for (i = 1; i < NSUBS; i++) {
grump = check(f2, subs[i], should[i]);
if (grump != NULL) {
fprintf(stderr, "%d: %s $%d %s\n", line, type, i, grump);
status = 1;
err = 1;
}
}
ob_regfree(&re);
}
/*
- options - pick options out of a regression-test string
== int options(int type, char *s);
*/
int options(int type, /* 'c' compile, 'e' exec */
char* s)
{
register char* p;
register int o = (type == 'c') ? copts : eopts;
register const char* legal = (type == 'c') ? "bisnmp" : "^$#tl";
for (p = s; *p != '\0'; p++)
if (strchr(legal, *p) != NULL)
switch (*p) {
case 'b':
o &= ~OB_REG_EXTENDED;
break;
case 'i':
o |= OB_REG_ICASE;
break;
case 's':
o |= OB_REG_NOSUB;
break;
case 'n':
o |= OB_REG_NEWLINE;
break;
case 'm':
o &= ~OB_REG_EXTENDED;
o |= OB_REG_NOSPEC;
break;
case 'p':
o |= OB_REG_PEND;
break;
case '^':
o |= OB_REG_NOTBOL;
break;
case '$':
o |= OB_REG_NOTEOL;
break;
case '#':
o |= OB_REG_STARTEND;
break;
case 't': /* trace */
o |= OB_REG_TRACE;
break;
case 'l': /* force long representation */
o |= OB_REG_LARGE;
break;
case 'r': /* force backref use */
o |= OB_REG_BACKR;
break;
}
return (o);
}
/*
- opt - is a particular option in a regression string?
== int opt(int c, char *s);
*/
int /* predicate */
opt(int c, char* s)
{
return (strchr(s, c) != NULL);
}
/*
- fixstr - transform magic characters in strings
== void fixstr(register char *p);
*/
void fixstr(register char* p)
{
if (p == NULL)
return;
for (; *p != '\0'; p++)
if (*p == 'N')
*p = '\n';
else if (*p == 'T')
*p = '\t';
else if (*p == 'S')
*p = ' ';
else if (*p == 'Z')
*p = '\0';
}
/*
- check - check a substring match
== char *check(char *str, ob_regmatch_t sub, char *should);
*/
char* /* NULL or complaint */
check(char* str, ob_regmatch_t sub, char* should)
{
register int len;
register int shlen;
register char* p;
static char grump[500];
register char* at = NULL;
if (should != NULL && strcmp(should, "-") == 0)
should = NULL;
if (should != NULL && should[0] == '@') {
at = should + 1;
should[0] = '\0';
}
/* check rm_so and rm_eo for consistency */
if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || (sub.rm_so != -1 && sub.rm_eo == -1) ||
(sub.rm_so != -1 && sub.rm_so < 0) || (sub.rm_eo != -1 && sub.rm_eo < 0)) {
sprintf(grump, "start %ld end %ld", (long)sub.rm_so, (long)sub.rm_eo);
return (grump);
}
/* check for no match */
if (sub.rm_so == -1 && should == NULL)
return (NULL);
if (sub.rm_so == -1) {
static char ret_err_buf[50] = "did not match";
return (ret_err_buf);
}
/* check for in range */
if (sub.rm_eo > strlen(str)) {
sprintf(grump, "start %ld end %ld, past end of string", (long)sub.rm_so, (long)sub.rm_eo);
return (grump);
}
len = (int)(sub.rm_eo - sub.rm_so);
shlen = (int)strlen(should);
p = str + sub.rm_so;
/* check for not supposed to match */
if (should == NULL) {
sprintf(grump, "matched `%.*s'", len, p);
return (grump);
}
/* check for wrong match */
if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
sprintf(grump, "matched `%.*s' instead", len, p);
return (grump);
}
if (shlen > 0)
return (NULL);
/* check null match in right place */
if (at == NULL)
return (NULL);
shlen = (int)strlen(at);
if (shlen == 0)
shlen = 1; /* force check for end-of-string */
if (strncmp(p, at, shlen) != 0) {
sprintf(grump, "matched null at `%.20s'", p);
return (grump);
}
return (NULL);
}
/*
- eprint - convert error number to name
== static char *eprint(int err);
*/
static char* eprint(int err)
{
static char epbuf[100];
size_t len;
len = ob_regerror(OB_REG_ITOA | err, (ob_regex_t*)NULL, epbuf, sizeof(epbuf));
assert(len <= sizeof(epbuf));
UNUSED(len);
return (epbuf);
}
/*
- efind - convert error name to number
== static int efind(char *name);
*/
static int efind(char* name)
{
static char efbuf[100];
// size_t n;
ob_regex_t re;
sprintf(efbuf, "OB_REG_%s", name);
assert(strlen(efbuf) < sizeof(efbuf));
re.re_endp = efbuf;
(void)ob_regerror(OB_REG_ATOI, &re, efbuf, sizeof(efbuf));
return (atoi(efbuf));
}
class ObRegexTest : public ::testing::Test {
public:
ObRegexTest();
virtual ~ObRegexTest();
virtual void SetUp();
virtual void TearDown();
private:
// disallow copy
ObRegexTest(const ObRegexTest& other);
ObRegexTest& operator=(const ObRegexTest& other);
private:
// data members
};
ObRegexTest::ObRegexTest()
{}
ObRegexTest::~ObRegexTest()
{}
void ObRegexTest::SetUp()
{}
void ObRegexTest::TearDown()
{}
TEST_F(ObRegexTest, basic_test)
{
/*
- main - do the simple case, hand off to regress() for regression
*/
char test_filename[] = "regex/tests";
FILE* test_file = fopen(test_filename, "r");
if (NULL == test_file) {
fprintf(stderr, "fail to open file '%s'\n", test_filename);
} else {
ASSERT_TRUE(regress(test_file));
fclose(test_file);
}
}
int main(int argc, char** argv)
{
OB_LOGGER.set_log_level("INFO");
// ob_init_memory_pool();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}